gqlite 1.3.0 → 1.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ext/Cargo.toml +1 -0
- data/ext/gqlitedb/Cargo.toml +2 -2
- data/ext/gqlitedb/src/compiler.rs +30 -19
- data/ext/gqlitedb/src/functions/containers.rs +1 -1
- data/ext/gqlitedb/src/functions/path.rs +1 -1
- data/ext/gqlitedb/src/functions/scalar.rs +23 -0
- data/ext/gqlitedb/src/functions.rs +8 -0
- data/ext/gqlitedb/src/interpreter/evaluators.rs +5 -8
- data/ext/gqlitedb/src/interpreter/instructions.rs +2 -2
- data/ext/gqlitedb/src/lib.rs +0 -2
- data/ext/gqlitedb/src/tests/templates/programs.rs +10 -10
- data/ext/gqliterb/src/lib.rs +0 -2
- data/ext/graphcore/Cargo.toml +1 -1
- data/ext/graphcore/src/lib.rs +0 -1
- data/ext/graphcore/src/table.rs +1 -1
- metadata +3 -10
- data/ext/gqlitedb/gqlite_bench_data/README.MD +0 -6
- data/ext/gqlitedb/gqlite_bench_data/scripts/generate_smaller_pokec.rb +0 -85
- data/ext/gqlitedb/gqlite_bench_data/scripts/to_efficient_pokec.rb +0 -34
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 38e53ea482654edda2dec42d18ba7aff7e5660f5643f80f66dfd966e21415ebd
|
|
4
|
+
data.tar.gz: ce0cb6a21adbca63605e86d753a3c20f281a2126601c5225c446d161efe97094
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: f1b60e9c256da4300381ef244158ea61a464529e7518dfe1ee532c0e5ada468860ea08a5c0ab91304b00b5e09590774131fd7487d0bb79d2435d7c13009bd0e6
|
|
7
|
+
data.tar.gz: 4009f247ebcdc9519bfa40853b5ab8b32ab3c53fd365cf6f9dd3a25e395596dc70a2c12f7e795496695d8227428f37edfb1375cbcc59ac35e71a76ba973bd318
|
data/ext/Cargo.toml
CHANGED
data/ext/gqlitedb/Cargo.toml
CHANGED
|
@@ -30,7 +30,7 @@ bundled = ["rusqlite/bundled"]
|
|
|
30
30
|
graphcore = { workspace = true }
|
|
31
31
|
|
|
32
32
|
askama = { workspace = true, optional = true }
|
|
33
|
-
ccutils = {
|
|
33
|
+
ccutils = { workspace = true, features = ["alias", "pool", "sync"] }
|
|
34
34
|
ciborium = "0.2"
|
|
35
35
|
itertools = { workspace = true }
|
|
36
36
|
pgrx = { version = "0.16", optional = true }
|
|
@@ -49,7 +49,7 @@ thiserror = { workspace = true }
|
|
|
49
49
|
uuid = { workspace = true }
|
|
50
50
|
|
|
51
51
|
[dev-dependencies]
|
|
52
|
-
ccutils = {
|
|
52
|
+
ccutils = { workspace = true, features = ["alias", "temporary"] }
|
|
53
53
|
divan = "0.1"
|
|
54
54
|
iai-callgrind = { version = "0.16" }
|
|
55
55
|
regex = "1"
|
|
@@ -19,6 +19,14 @@ macro_rules! compile_binary_op {
|
|
|
19
19
|
};
|
|
20
20
|
}
|
|
21
21
|
|
|
22
|
+
struct CompiledReturnWith
|
|
23
|
+
{
|
|
24
|
+
variables: Vec<(String, RWExpression)>,
|
|
25
|
+
filter: Instructions,
|
|
26
|
+
modifiers: Modifiers,
|
|
27
|
+
variables_sizes: VariablesSizes,
|
|
28
|
+
}
|
|
29
|
+
|
|
22
30
|
struct Compiler
|
|
23
31
|
{
|
|
24
32
|
function_manager: functions::Manager,
|
|
@@ -701,19 +709,13 @@ impl Compiler
|
|
|
701
709
|
})
|
|
702
710
|
}
|
|
703
711
|
|
|
704
|
-
#[allow(clippy::type_complexity)]
|
|
705
712
|
fn compile_return_with(
|
|
706
713
|
&mut self,
|
|
707
714
|
all: bool,
|
|
708
715
|
expressions: &[ast::NamedExpression],
|
|
709
716
|
where_expression: &Option<ast::Expression>,
|
|
710
717
|
modifiers: &ast::Modifiers,
|
|
711
|
-
) -> Result<
|
|
712
|
-
Vec<(String, RWExpression)>,
|
|
713
|
-
Instructions,
|
|
714
|
-
Modifiers,
|
|
715
|
-
VariablesSizes,
|
|
716
|
-
)>
|
|
718
|
+
) -> Result<CompiledReturnWith>
|
|
717
719
|
{
|
|
718
720
|
let mut variables = Vec::<(ast::VariableIdentifier, RWExpression)>::new();
|
|
719
721
|
let mut filter = Default::default();
|
|
@@ -775,7 +777,7 @@ impl Compiler
|
|
|
775
777
|
}
|
|
776
778
|
|
|
777
779
|
let modifiers = self.compile_modifiers(modifiers)?;
|
|
778
|
-
let
|
|
780
|
+
let variables_sizes = self.variables_size();
|
|
779
781
|
self
|
|
780
782
|
.variables_manager
|
|
781
783
|
.keep_variables(variables.iter().map(|(n, _)| n))?;
|
|
@@ -784,7 +786,12 @@ impl Compiler
|
|
|
784
786
|
.into_iter()
|
|
785
787
|
.map(|(var_id, e)| (var_id.take_name(), e))
|
|
786
788
|
.collect();
|
|
787
|
-
Ok(
|
|
789
|
+
Ok(CompiledReturnWith {
|
|
790
|
+
variables,
|
|
791
|
+
filter,
|
|
792
|
+
modifiers,
|
|
793
|
+
variables_sizes,
|
|
794
|
+
})
|
|
788
795
|
}
|
|
789
796
|
|
|
790
797
|
fn compile_match_patterns(
|
|
@@ -951,17 +958,17 @@ pub(crate) fn compile(
|
|
|
951
958
|
),
|
|
952
959
|
ast::Statement::Return(return_statement) =>
|
|
953
960
|
{
|
|
954
|
-
let
|
|
961
|
+
let compiled_return_with = compiler.compile_return_with(
|
|
955
962
|
return_statement.all,
|
|
956
963
|
&return_statement.expressions,
|
|
957
964
|
&return_statement.where_expression,
|
|
958
965
|
&return_statement.modifiers,
|
|
959
966
|
)?;
|
|
960
967
|
Ok(Block::Return {
|
|
961
|
-
variables,
|
|
962
|
-
filter,
|
|
963
|
-
modifiers,
|
|
964
|
-
|
|
968
|
+
variables: compiled_return_with.variables,
|
|
969
|
+
filter: compiled_return_with.filter,
|
|
970
|
+
modifiers: compiled_return_with.modifiers,
|
|
971
|
+
variables_sizes: compiled_return_with.variables_sizes,
|
|
965
972
|
})
|
|
966
973
|
}
|
|
967
974
|
ast::Statement::Call(call) =>
|
|
@@ -978,17 +985,21 @@ pub(crate) fn compile(
|
|
|
978
985
|
}
|
|
979
986
|
ast::Statement::With(with) =>
|
|
980
987
|
{
|
|
981
|
-
let
|
|
988
|
+
let compiled_return_with = compiler.compile_return_with(
|
|
982
989
|
with.all,
|
|
983
990
|
&with.expressions,
|
|
984
991
|
&with.where_expression,
|
|
985
992
|
&with.modifiers,
|
|
986
993
|
)?;
|
|
987
994
|
Ok(Block::With {
|
|
988
|
-
variables:
|
|
989
|
-
|
|
990
|
-
|
|
991
|
-
|
|
995
|
+
variables: compiled_return_with
|
|
996
|
+
.variables
|
|
997
|
+
.into_iter()
|
|
998
|
+
.map(|(_, v)| v)
|
|
999
|
+
.collect(),
|
|
1000
|
+
filter: compiled_return_with.filter,
|
|
1001
|
+
modifiers: compiled_return_with.modifiers,
|
|
1002
|
+
variables_sizes: compiled_return_with.variables_sizes,
|
|
992
1003
|
})
|
|
993
1004
|
}
|
|
994
1005
|
ast::Statement::Unwind(unwind) =>
|
|
@@ -10,7 +10,7 @@ impl super::FunctionTrait for Length
|
|
|
10
10
|
{
|
|
11
11
|
let container = arguments
|
|
12
12
|
.first()
|
|
13
|
-
.
|
|
13
|
+
.ok_or(RunTimeError::InvalidNumberOfArguments {
|
|
14
14
|
function_name: "length",
|
|
15
15
|
got: arguments.len(),
|
|
16
16
|
expected: 1,
|
|
@@ -32,6 +32,29 @@ impl super::FunctionTrait for Coalesce
|
|
|
32
32
|
|
|
33
33
|
super::declare_function!(coalesce, Coalesce, custom_trait);
|
|
34
34
|
|
|
35
|
+
#[derive(Debug, Default)]
|
|
36
|
+
pub(super) struct Id {}
|
|
37
|
+
|
|
38
|
+
impl Id
|
|
39
|
+
{
|
|
40
|
+
fn call_impl(value: &value::Value) -> FResult<graph::Key>
|
|
41
|
+
{
|
|
42
|
+
match value
|
|
43
|
+
{
|
|
44
|
+
value::Value::Node(n) => Ok(n.key()),
|
|
45
|
+
value::Value::Edge(e) => Ok(e.key()),
|
|
46
|
+
_ => Err(RunTimeError::InvalidArgument {
|
|
47
|
+
function_name: "id",
|
|
48
|
+
index: 0,
|
|
49
|
+
expected_type: "node or edge",
|
|
50
|
+
value: format!("{:?}", value),
|
|
51
|
+
}),
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
super::declare_function!(id, Id, call_impl(crate::value::Value) -> graph::Key);
|
|
57
|
+
|
|
35
58
|
#[derive(Debug, Default)]
|
|
36
59
|
pub(super) struct ToInteger {}
|
|
37
60
|
|
|
@@ -59,6 +59,13 @@ impl FunctionTypeTrait for f64
|
|
|
59
59
|
}
|
|
60
60
|
}
|
|
61
61
|
|
|
62
|
+
impl FunctionTypeTrait for graph::Key
|
|
63
|
+
{
|
|
64
|
+
fn result_type() -> ExpressionType
|
|
65
|
+
{
|
|
66
|
+
ExpressionType::Key
|
|
67
|
+
}
|
|
68
|
+
}
|
|
62
69
|
impl<T> FunctionTypeTrait for Vec<T>
|
|
63
70
|
{
|
|
64
71
|
fn result_type() -> ExpressionType
|
|
@@ -146,6 +153,7 @@ impl Manager
|
|
|
146
153
|
path::Nodes::create(),
|
|
147
154
|
path::Edges::create(),
|
|
148
155
|
scalar::Coalesce::create(),
|
|
156
|
+
scalar::Id::create(),
|
|
149
157
|
scalar::Properties::create(),
|
|
150
158
|
scalar::ToInteger::create(),
|
|
151
159
|
string::ToString::create(),
|
|
@@ -950,7 +950,6 @@ fn compute_return_with_table(
|
|
|
950
950
|
for row in input_table.into_row_iter()
|
|
951
951
|
{
|
|
952
952
|
// a) compute non-aggregated columns
|
|
953
|
-
let mut row = row.extended(variables_sizes.total_size())?;
|
|
954
953
|
let out_row = variables
|
|
955
954
|
.iter()
|
|
956
955
|
.map(|rw_expr| {
|
|
@@ -959,9 +958,7 @@ fn compute_return_with_table(
|
|
|
959
958
|
assert_eq!(rw_expr.aggregations.len(), 0);
|
|
960
959
|
let mut stack = Stack::default();
|
|
961
960
|
eval_instructions(&mut stack, &row, &rw_expr.instructions, parameters)?;
|
|
962
|
-
|
|
963
|
-
row.set(rw_expr.col_id, value.to_owned())?;
|
|
964
|
-
Ok(value)
|
|
961
|
+
stack.try_pop_into()
|
|
965
962
|
}
|
|
966
963
|
else
|
|
967
964
|
{
|
|
@@ -1039,12 +1036,12 @@ fn compute_return_with_table(
|
|
|
1039
1036
|
output_table = input_table
|
|
1040
1037
|
.into_row_iter()
|
|
1041
1038
|
.map(|row| {
|
|
1042
|
-
let mut out_row = row.extended(variables_sizes.total_size())?;
|
|
1039
|
+
let mut out_row = row.clone().extended(variables_sizes.total_size())?;
|
|
1043
1040
|
for rw_expr in variables.iter()
|
|
1044
1041
|
{
|
|
1045
1042
|
assert_eq!(rw_expr.aggregations.len(), 0);
|
|
1046
1043
|
let mut stack = Stack::default();
|
|
1047
|
-
eval_instructions(&mut stack, &
|
|
1044
|
+
eval_instructions(&mut stack, &row, &rw_expr.instructions, parameters)?;
|
|
1048
1045
|
let value: value::Value = stack.try_pop_into()?;
|
|
1049
1046
|
out_row.set(rw_expr.col_id, value.to_owned())?;
|
|
1050
1047
|
}
|
|
@@ -1429,7 +1426,7 @@ pub(crate) fn eval_program<TStore: store::Store>(
|
|
|
1429
1426
|
variables,
|
|
1430
1427
|
filter,
|
|
1431
1428
|
modifiers,
|
|
1432
|
-
variables_size,
|
|
1429
|
+
variables_sizes: variables_size,
|
|
1433
1430
|
} =>
|
|
1434
1431
|
{
|
|
1435
1432
|
let (names, variables): (Vec<_>, Vec<_>) = variables.iter().map(|(s, e)| (s, e)).unzip();
|
|
@@ -1454,7 +1451,7 @@ pub(crate) fn eval_program<TStore: store::Store>(
|
|
|
1454
1451
|
variables,
|
|
1455
1452
|
filter,
|
|
1456
1453
|
modifiers,
|
|
1457
|
-
variables_size,
|
|
1454
|
+
variables_sizes: variables_size,
|
|
1458
1455
|
} =>
|
|
1459
1456
|
{
|
|
1460
1457
|
input_table = compute_return_with_table(
|
|
@@ -234,7 +234,7 @@ pub(crate) enum Block
|
|
|
234
234
|
variables: Vec<(String, RWExpression)>,
|
|
235
235
|
filter: Instructions,
|
|
236
236
|
modifiers: Modifiers,
|
|
237
|
-
|
|
237
|
+
variables_sizes: VariablesSizes,
|
|
238
238
|
},
|
|
239
239
|
Call
|
|
240
240
|
{
|
|
@@ -247,7 +247,7 @@ pub(crate) enum Block
|
|
|
247
247
|
variables: Vec<RWExpression>,
|
|
248
248
|
filter: Instructions,
|
|
249
249
|
modifiers: Modifiers,
|
|
250
|
-
|
|
250
|
+
variables_sizes: VariablesSizes,
|
|
251
251
|
},
|
|
252
252
|
Unwind
|
|
253
253
|
{
|
data/ext/gqlitedb/src/lib.rs
CHANGED
|
@@ -71,7 +71,7 @@ pub(crate) fn create_named_node() -> Program
|
|
|
71
71
|
skip: None,
|
|
72
72
|
order_by: vec![],
|
|
73
73
|
},
|
|
74
|
-
|
|
74
|
+
variables_sizes: create_variable_size(2, 0),
|
|
75
75
|
},
|
|
76
76
|
]
|
|
77
77
|
}
|
|
@@ -133,7 +133,7 @@ pub(crate) fn create_named_node_double_return() -> Program
|
|
|
133
133
|
skip: None,
|
|
134
134
|
order_by: vec![],
|
|
135
135
|
},
|
|
136
|
-
|
|
136
|
+
variables_sizes: create_variable_size(3, 0),
|
|
137
137
|
},
|
|
138
138
|
]
|
|
139
139
|
}
|
|
@@ -161,7 +161,7 @@ pub(crate) fn double_with_return() -> Program
|
|
|
161
161
|
skip: None,
|
|
162
162
|
order_by: vec![],
|
|
163
163
|
},
|
|
164
|
-
|
|
164
|
+
variables_sizes: create_variable_size(2, 0),
|
|
165
165
|
},
|
|
166
166
|
Block::With {
|
|
167
167
|
variables: vec![
|
|
@@ -182,7 +182,7 @@ pub(crate) fn double_with_return() -> Program
|
|
|
182
182
|
skip: None,
|
|
183
183
|
order_by: vec![],
|
|
184
184
|
},
|
|
185
|
-
|
|
185
|
+
variables_sizes: create_variable_size(4, 0),
|
|
186
186
|
},
|
|
187
187
|
Block::Return {
|
|
188
188
|
variables: vec![(
|
|
@@ -199,7 +199,7 @@ pub(crate) fn double_with_return() -> Program
|
|
|
199
199
|
skip: None,
|
|
200
200
|
order_by: vec![],
|
|
201
201
|
},
|
|
202
|
-
|
|
202
|
+
variables_sizes: create_variable_size(2, 0),
|
|
203
203
|
},
|
|
204
204
|
]
|
|
205
205
|
}
|
|
@@ -234,7 +234,7 @@ pub(crate) fn unwind() -> Program
|
|
|
234
234
|
skip: None,
|
|
235
235
|
order_by: vec![],
|
|
236
236
|
},
|
|
237
|
-
|
|
237
|
+
variables_sizes: create_variable_size(1, 0),
|
|
238
238
|
},
|
|
239
239
|
]
|
|
240
240
|
}
|
|
@@ -288,7 +288,7 @@ pub(crate) fn match_loop() -> Program
|
|
|
288
288
|
skip: None,
|
|
289
289
|
order_by: vec![],
|
|
290
290
|
},
|
|
291
|
-
|
|
291
|
+
variables_sizes: create_variable_size(1, 0),
|
|
292
292
|
},
|
|
293
293
|
]
|
|
294
294
|
}
|
|
@@ -327,7 +327,7 @@ pub(crate) fn optional_match() -> Program
|
|
|
327
327
|
skip: None,
|
|
328
328
|
order_by: vec![],
|
|
329
329
|
},
|
|
330
|
-
|
|
330
|
+
variables_sizes: create_variable_size(1, 0),
|
|
331
331
|
},
|
|
332
332
|
]
|
|
333
333
|
}
|
|
@@ -378,7 +378,7 @@ pub(crate) fn match_count(function_manager: &functions::Manager) -> Program
|
|
|
378
378
|
skip: None,
|
|
379
379
|
order_by: vec![],
|
|
380
380
|
},
|
|
381
|
-
|
|
381
|
+
variables_sizes: create_variable_size(2, 1),
|
|
382
382
|
},
|
|
383
383
|
]
|
|
384
384
|
}
|
|
@@ -449,7 +449,7 @@ pub(crate) fn aggregation(function_manager: &functions::Manager) -> Program
|
|
|
449
449
|
skip: None,
|
|
450
450
|
order_by: vec![],
|
|
451
451
|
},
|
|
452
|
-
|
|
452
|
+
variables_sizes: create_variable_size(3, 1),
|
|
453
453
|
},
|
|
454
454
|
]
|
|
455
455
|
}
|
data/ext/gqliterb/src/lib.rs
CHANGED
data/ext/graphcore/Cargo.toml
CHANGED
data/ext/graphcore/src/lib.rs
CHANGED
data/ext/graphcore/src/table.rs
CHANGED
|
@@ -15,7 +15,7 @@ impl Table
|
|
|
15
15
|
/// of row is equal to length of data divided by number of columns.
|
|
16
16
|
pub fn new(headers: Vec<String>, data: Vec<crate::Value>) -> Result<Table>
|
|
17
17
|
{
|
|
18
|
-
if data.len()
|
|
18
|
+
if !data.len().is_multiple_of(headers.len())
|
|
19
19
|
{
|
|
20
20
|
Err(error::Error::InvalidTableDimensions)?;
|
|
21
21
|
}
|
metadata
CHANGED
|
@@ -1,14 +1,13 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: gqlite
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 1.3.
|
|
4
|
+
version: 1.3.1
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Cyrille Berger
|
|
8
|
-
autorequire:
|
|
9
8
|
bindir: bin
|
|
10
9
|
cert_chain: []
|
|
11
|
-
date:
|
|
10
|
+
date: 1980-01-02 00:00:00.000000000 Z
|
|
12
11
|
dependencies:
|
|
13
12
|
- !ruby/object:Gem::Dependency
|
|
14
13
|
name: rb_sys
|
|
@@ -56,7 +55,6 @@ description: "GQLite is a Rust-language library, with a C interface, that implem
|
|
|
56
55
|
=> ex\n # Report any error\n puts \"An error has occured: #{ex.message}\"\nend\n\n```\n\nThe
|
|
57
56
|
documentation for the GQL query language can found in [OpenCypher](https://auksys.org/documentation/5/libraries/gqlite/opencypher/)
|
|
58
57
|
and for the [API](https://auksys.org/documentation/5/libraries/gqlite/api/).\n\n"
|
|
59
|
-
email:
|
|
60
58
|
executables: []
|
|
61
59
|
extensions:
|
|
62
60
|
- ext/gqliterb/extconf.rb
|
|
@@ -69,9 +67,6 @@ files:
|
|
|
69
67
|
- ext/gqlitedb/benches/common/pokec.rs
|
|
70
68
|
- ext/gqlitedb/benches/pokec_divan.rs
|
|
71
69
|
- ext/gqlitedb/benches/pokec_iai.rs
|
|
72
|
-
- ext/gqlitedb/gqlite_bench_data/README.MD
|
|
73
|
-
- ext/gqlitedb/gqlite_bench_data/scripts/generate_smaller_pokec.rb
|
|
74
|
-
- ext/gqlitedb/gqlite_bench_data/scripts/to_efficient_pokec.rb
|
|
75
70
|
- ext/gqlitedb/release.toml
|
|
76
71
|
- ext/gqlitedb/src/aggregators.rs
|
|
77
72
|
- ext/gqlitedb/src/aggregators/arithmetic.rs
|
|
@@ -162,7 +157,6 @@ homepage: https://gitlab.com/auksys/gqlite
|
|
|
162
157
|
licenses:
|
|
163
158
|
- MIT
|
|
164
159
|
metadata: {}
|
|
165
|
-
post_install_message:
|
|
166
160
|
rdoc_options: []
|
|
167
161
|
require_paths:
|
|
168
162
|
- lib
|
|
@@ -177,8 +171,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
177
171
|
- !ruby/object:Gem::Version
|
|
178
172
|
version: '0'
|
|
179
173
|
requirements: []
|
|
180
|
-
rubygems_version: 3.
|
|
181
|
-
signing_key:
|
|
174
|
+
rubygems_version: 3.6.7
|
|
182
175
|
specification_version: 4
|
|
183
176
|
summary: Ruby bindings for GQLite, a Graph Query library.
|
|
184
177
|
test_files: []
|
|
@@ -1,6 +0,0 @@
|
|
|
1
|
-
 GQLite Benchnark Data
|
|
2
|
-
==============================================
|
|
3
|
-
|
|
4
|
-
This repository includes data used to benchmark GQLite.
|
|
5
|
-
|
|
6
|
-
* `pokec_tiny_nodes.cypher` is a subset of the [pokec mini](https://github.com/memgraph/memgraph/tree/master/tests/mgbench#pokec) dataset ([original](https://snap.stanford.edu/data/soc-Pokec.html)).
|
|
@@ -1,85 +0,0 @@
|
|
|
1
|
-
# Script that reduces the pokec dataset, to reduce it to a subset of users.
|
|
2
|
-
# It also reduce the initial set of queries into a single one.
|
|
3
|
-
#
|
|
4
|
-
# Use as:
|
|
5
|
-
# ```bash
|
|
6
|
-
# ruby reduce_pokec.rb size input.cypher > output.cypher
|
|
7
|
-
# ```
|
|
8
|
-
|
|
9
|
-
require 'set'
|
|
10
|
-
|
|
11
|
-
size = ARGV[0] or abort("Usage: ruby #{$0} micro/tiny input.cypher")
|
|
12
|
-
input_file = ARGV[1] or abort("Usage: ruby #{$0} micro/tiny input.cypher")
|
|
13
|
-
|
|
14
|
-
if size == "tiny"
|
|
15
|
-
initial = 5
|
|
16
|
-
increments = 2
|
|
17
|
-
elsif size == "micro"
|
|
18
|
-
initial = 1
|
|
19
|
-
increments = 1
|
|
20
|
-
else
|
|
21
|
-
abort("Unknown size '#{size}', should be 'tiny' or 'micro'")
|
|
22
|
-
end
|
|
23
|
-
|
|
24
|
-
user_lines = {}
|
|
25
|
-
all_edges = []
|
|
26
|
-
edges_by_user = Hash.new { |h, k| h[k] = [] }
|
|
27
|
-
selected_edges = []
|
|
28
|
-
seen_edges = Set.new
|
|
29
|
-
selected_users = Set.new
|
|
30
|
-
|
|
31
|
-
def edge_key(id1, id2)
|
|
32
|
-
[id1, id2].sort.join('-')
|
|
33
|
-
end
|
|
34
|
-
|
|
35
|
-
File.foreach(input_file) do |line|
|
|
36
|
-
if line =~ /CREATE\s+\(:User\s*(\{.*id:\s*(\d+)[^}]*\})\)/i
|
|
37
|
-
user_lines[$2.to_i] = $1
|
|
38
|
-
|
|
39
|
-
elsif line =~ /MATCH\s+\(n:User\s*\{id:\s*(\d+)\}\),\s*\(m:User\s*\{id:\s*(\d+)\}\).*CREATE.*Friend/i
|
|
40
|
-
id1, id2 = $1.to_i, $2.to_i
|
|
41
|
-
edge = { line: line, ids: [id1, id2] }
|
|
42
|
-
all_edges << edge
|
|
43
|
-
edges_by_user[id1] << edge
|
|
44
|
-
edges_by_user[id2] << edge
|
|
45
|
-
end
|
|
46
|
-
end
|
|
47
|
-
|
|
48
|
-
# Step 1: Select initial edges
|
|
49
|
-
initial_edges = all_edges.take(initial)
|
|
50
|
-
initial_edges.each do |edge|
|
|
51
|
-
k = edge_key(*edge[:ids])
|
|
52
|
-
next if seen_edges.include?(k)
|
|
53
|
-
|
|
54
|
-
seen_edges << k
|
|
55
|
-
selected_edges << edge
|
|
56
|
-
selected_users.merge(edge[:ids])
|
|
57
|
-
end
|
|
58
|
-
|
|
59
|
-
# Step 2: Find all other edges connected to selected users
|
|
60
|
-
for i in 0...increments
|
|
61
|
-
for uid in selected_users.clone()
|
|
62
|
-
edges_by_user[uid].each do |edge|
|
|
63
|
-
k = edge_key(*edge[:ids])
|
|
64
|
-
next if seen_edges.include?(k)
|
|
65
|
-
|
|
66
|
-
seen_edges << k
|
|
67
|
-
selected_edges << edge
|
|
68
|
-
|
|
69
|
-
# Add any new user ID to the queue
|
|
70
|
-
selected_users.merge(edge[:ids])
|
|
71
|
-
end
|
|
72
|
-
end
|
|
73
|
-
end
|
|
74
|
-
|
|
75
|
-
puts "CREATE"
|
|
76
|
-
# Output user CREATEs
|
|
77
|
-
selected_users.each do |id|
|
|
78
|
-
puts " (user_#{id}:User #{user_lines[id]})," if user_lines.key?(id)
|
|
79
|
-
end
|
|
80
|
-
|
|
81
|
-
# Output edges
|
|
82
|
-
selected_edges.each do |edge|
|
|
83
|
-
ids = edge[:ids]
|
|
84
|
-
puts " (user_#{ids[0]})-[:Friend]->(user_#{ids[1]}),"
|
|
85
|
-
end
|
|
@@ -1,34 +0,0 @@
|
|
|
1
|
-
# Script that transform the pokec dataset into a more efficient query.
|
|
2
|
-
#
|
|
3
|
-
# Use as:
|
|
4
|
-
# ```bash
|
|
5
|
-
# ruby to_efficient_pokec.rb input.cypher > output.cypher
|
|
6
|
-
# ```
|
|
7
|
-
|
|
8
|
-
require 'set'
|
|
9
|
-
|
|
10
|
-
input_file = ARGV[0] or abort("Usage: ruby #{$0} input.cypher")
|
|
11
|
-
|
|
12
|
-
user_lines = []
|
|
13
|
-
edge_lines = []
|
|
14
|
-
|
|
15
|
-
File.foreach(input_file) do |line|
|
|
16
|
-
if line =~ /CREATE\s+\(:User\s*(\{.*id:\s*(\d+)[^}]*\})\)/i
|
|
17
|
-
user_lines.append({ id: $2.to_i, data: $1})
|
|
18
|
-
|
|
19
|
-
elsif line =~ /MATCH\s+\(n:User\s*\{id:\s*(\d+)\}\),\s*\(m:User\s*\{id:\s*(\d+)\}\).*CREATE.*Friend/i
|
|
20
|
-
id1, id2 = $1.to_i, $2.to_i
|
|
21
|
-
edge_lines.append({ id1: id1, id2: id2 })
|
|
22
|
-
end
|
|
23
|
-
end
|
|
24
|
-
|
|
25
|
-
puts "CREATE"
|
|
26
|
-
# Output user CREATEs
|
|
27
|
-
user_lines.each do |user|
|
|
28
|
-
puts " (user_#{user[:id]}:User #{user[:data]}),"
|
|
29
|
-
end
|
|
30
|
-
|
|
31
|
-
# Output edges
|
|
32
|
-
edge_lines.each do |edge|
|
|
33
|
-
puts " (user_#{edge[:id1]})-[:Friend]->(user_#{edge[:id2]}),"
|
|
34
|
-
end
|