pacer 1.1.0-java → 1.1.1-java
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/pacer/core/array_route.rb +78 -0
- data/lib/pacer/core/graph/path_route.rb +6 -41
- data/lib/pacer/core/hash_route.rb +10 -0
- data/lib/pacer/core/string_route.rb +10 -0
- data/lib/pacer/loader.rb +4 -0
- data/lib/pacer/route_builder.rb +4 -1
- data/lib/pacer/side_effect/aggregate.rb +9 -5
- data/lib/pacer/transform/cap.rb +2 -2
- data/lib/pacer/transform/gather.rb +2 -2
- data/lib/pacer/transform/reduce.rb +165 -0
- data/lib/pacer/version.rb +1 -1
- data/lib/{pacer-1.1.0-standalone.jar → pacer-1.1.1-standalone.jar} +0 -0
- data/pom.xml +1 -1
- data/spec/pacer/transform/path_tree_spec.rb +1 -1
- metadata +6 -2
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
module Pacer
|
|
2
|
+
module Core
|
|
3
|
+
module ArrayRoute
|
|
4
|
+
def help(section = nil)
|
|
5
|
+
case section
|
|
6
|
+
when :arrays
|
|
7
|
+
puts <<HELP
|
|
8
|
+
The following array route methods are available:
|
|
9
|
+
|
|
10
|
+
#lengths Return the length of each array
|
|
11
|
+
|
|
12
|
+
#transpose Route version of Ruby's Array#transpase
|
|
13
|
+
|
|
14
|
+
#compacted Removes nils from each array
|
|
15
|
+
|
|
16
|
+
#heads Route to only the first element from each array
|
|
17
|
+
|
|
18
|
+
#tails Route to only the last element from each array
|
|
19
|
+
|
|
20
|
+
#pairs(head, tail) Route to an array of only the head and tail elements
|
|
21
|
+
head: Number Array index of the : first : element in the pair
|
|
22
|
+
tail: Number : second :
|
|
23
|
+
|
|
24
|
+
#len(n) Filter paths by length
|
|
25
|
+
n: Number | Range
|
|
26
|
+
|
|
27
|
+
HELP
|
|
28
|
+
else
|
|
29
|
+
super
|
|
30
|
+
end
|
|
31
|
+
description
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
def lengths
|
|
35
|
+
map(element_type: :integer) { |s| s.length }
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
# This could be done more efficiently by reimplementing
|
|
39
|
+
# transpose... Right now it needs 2n memory.
|
|
40
|
+
def transpose
|
|
41
|
+
gather { [] }.
|
|
42
|
+
map(element_type: :array) { |a| a.transpose }.
|
|
43
|
+
scatter(element_type: :array)
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
def compacted
|
|
47
|
+
map element_type: element_type, route_name: 'compact' do |a|
|
|
48
|
+
a.compact
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
def heads(et = nil)
|
|
53
|
+
map element_type: et, route_name: 'heads' do |a|
|
|
54
|
+
a.first
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
def tails(et = nil)
|
|
59
|
+
map element_type: et, route_name: 'tails' do |a|
|
|
60
|
+
a.last
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
def pairs(head = 0, tail = -1)
|
|
65
|
+
map element_type: element_type, route_name: "pairs[#{ head },#{ tail }]" do |a|
|
|
66
|
+
[a[head], a[tail]]
|
|
67
|
+
end
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
def len(n)
|
|
71
|
+
select do |path|
|
|
72
|
+
n === path.length
|
|
73
|
+
end
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
end
|
|
77
|
+
end
|
|
78
|
+
|
|
@@ -4,9 +4,9 @@ module Pacer::Core::Graph
|
|
|
4
4
|
case section
|
|
5
5
|
when :paths
|
|
6
6
|
puts <<HELP
|
|
7
|
-
The following path
|
|
7
|
+
The following path-specific route methods are available:
|
|
8
8
|
|
|
9
|
-
|
|
9
|
+
See also the :arrays section for more available methods
|
|
10
10
|
|
|
11
11
|
#subgraph(target_graph, opts) Add each element in the path to the graph
|
|
12
12
|
target_graph: PacerGraph (optional) if not specified creates a new TG.
|
|
@@ -18,19 +18,6 @@ The following path helper methods are available:
|
|
|
18
18
|
ignore_missing_vertices: Boolean Squelches the above mentioned exception
|
|
19
19
|
show_missing_vertices: Boolean Complain about missing vertices
|
|
20
20
|
|
|
21
|
-
#compact_paths Removes nils from paths
|
|
22
|
-
|
|
23
|
-
#heads Route to only the first element from each path
|
|
24
|
-
|
|
25
|
-
#tails Route to only the last element from each path
|
|
26
|
-
|
|
27
|
-
#pairs(head, tail) Route to a mini path of only the first and last elements
|
|
28
|
-
head: Number Array index of the : first : element in the pair
|
|
29
|
-
tail: Number : second :
|
|
30
|
-
|
|
31
|
-
#len(n) Filter paths by length
|
|
32
|
-
n: Number | Range
|
|
33
|
-
|
|
34
21
|
#hashify Make a hash of the properties and relationships of the path
|
|
35
22
|
This is just a simple view on the data to facilitate analysis
|
|
36
23
|
|
|
@@ -41,10 +28,6 @@ HELP
|
|
|
41
28
|
description
|
|
42
29
|
end
|
|
43
30
|
|
|
44
|
-
def transpose
|
|
45
|
-
collect { |arraylist| arraylist.to_a }.transpose
|
|
46
|
-
end
|
|
47
|
-
|
|
48
31
|
def subgraph(target_graph = nil, opts = {})
|
|
49
32
|
raise "Can't create a subgraph within itself." if target_graph == graph
|
|
50
33
|
target_graph ||= Pacer.tg
|
|
@@ -82,34 +65,16 @@ HELP
|
|
|
82
65
|
end
|
|
83
66
|
end
|
|
84
67
|
|
|
85
|
-
def
|
|
86
|
-
map
|
|
87
|
-
path.compact
|
|
88
|
-
end
|
|
68
|
+
def transpose
|
|
69
|
+
map(element_type: :array, &:to_a).transpose
|
|
89
70
|
end
|
|
90
71
|
|
|
91
72
|
def heads(et = :vertex)
|
|
92
|
-
|
|
93
|
-
path.first
|
|
94
|
-
end
|
|
73
|
+
super et
|
|
95
74
|
end
|
|
96
75
|
|
|
97
76
|
def tails(et = :vertex)
|
|
98
|
-
|
|
99
|
-
path.last
|
|
100
|
-
end
|
|
101
|
-
end
|
|
102
|
-
|
|
103
|
-
def pairs(head = 0, tail = -1)
|
|
104
|
-
map element_type: :path, route_name: "pairs[#{ head },#{ tail }]" do |path|
|
|
105
|
-
[path[head], path[tail]]
|
|
106
|
-
end
|
|
107
|
-
end
|
|
108
|
-
|
|
109
|
-
def len(n)
|
|
110
|
-
select do |path|
|
|
111
|
-
n === path.length
|
|
112
|
-
end
|
|
77
|
+
super et
|
|
113
78
|
end
|
|
114
79
|
|
|
115
80
|
def hashify
|
data/lib/pacer/loader.rb
CHANGED
|
@@ -19,6 +19,9 @@ require 'pacer/exceptions'
|
|
|
19
19
|
require 'pacer/pipes'
|
|
20
20
|
|
|
21
21
|
require 'pacer/core/route'
|
|
22
|
+
require 'pacer/core/string_route'
|
|
23
|
+
require 'pacer/core/array_route'
|
|
24
|
+
require 'pacer/core/hash_route'
|
|
22
25
|
require 'pacer/core/graph'
|
|
23
26
|
require 'pacer/core/side_effect'
|
|
24
27
|
|
|
@@ -83,6 +86,7 @@ require 'pacer/transform/stream_uniq'
|
|
|
83
86
|
require 'pacer/transform/gather'
|
|
84
87
|
require 'pacer/transform/map'
|
|
85
88
|
require 'pacer/transform/flat_map'
|
|
89
|
+
require 'pacer/transform/reduce'
|
|
86
90
|
require 'pacer/transform/make_pairs'
|
|
87
91
|
require 'pacer/transform/process'
|
|
88
92
|
require 'pacer/transform/join'
|
data/lib/pacer/route_builder.rb
CHANGED
|
@@ -25,7 +25,10 @@ module Pacer
|
|
|
25
25
|
element_types[:vertex] = [Pacer::Core::Graph::ElementRoute, Pacer::Core::Graph::VerticesRoute]
|
|
26
26
|
element_types[:edge] = [Pacer::Core::Graph::ElementRoute, Pacer::Core::Graph::EdgesRoute]
|
|
27
27
|
element_types[:mixed] = [Pacer::Core::Graph::ElementRoute, Pacer::Core::Graph::MixedRoute]
|
|
28
|
-
element_types[:path] = [Pacer::Core::Graph::PathRoute]
|
|
28
|
+
element_types[:path] = [Pacer::Core::ArrayRoute, Pacer::Core::Graph::PathRoute]
|
|
29
|
+
element_types[:string] = [Pacer::Core::StringRoute]
|
|
30
|
+
element_types[:array] = [Pacer::Core::ArrayRoute]
|
|
31
|
+
element_types[:hash] = [Pacer::Core::HashRoute]
|
|
29
32
|
end
|
|
30
33
|
|
|
31
34
|
def chain(source, args)
|
|
@@ -1,9 +1,10 @@
|
|
|
1
1
|
module Pacer
|
|
2
2
|
module Routes::RouteOperations
|
|
3
|
-
def aggregate(into = nil)
|
|
3
|
+
def aggregate(into = nil, &block)
|
|
4
4
|
aggregate = ::Pacer::SideEffect::Aggregate
|
|
5
5
|
r = self
|
|
6
6
|
r = section(into, aggregate::ElementSet) if into.is_a? Symbol
|
|
7
|
+
into = block if block
|
|
7
8
|
r.chain_route :side_effect => aggregate, :into => into
|
|
8
9
|
end
|
|
9
10
|
end
|
|
@@ -25,13 +26,16 @@ module Pacer
|
|
|
25
26
|
protected
|
|
26
27
|
|
|
27
28
|
def attach_pipe(end_pipe)
|
|
28
|
-
|
|
29
|
+
case into
|
|
30
|
+
when Symbol
|
|
29
31
|
hs = vars[into] = HashSet.new
|
|
30
32
|
pipe = AggregatePipe.new hs
|
|
31
|
-
|
|
32
|
-
pipe = AggregatePipe.new into
|
|
33
|
-
|
|
33
|
+
when Proc
|
|
34
|
+
pipe = AggregatePipe.new into.call(self)
|
|
35
|
+
when nil
|
|
34
36
|
pipe = AggregatePipe.new HashSet.new
|
|
37
|
+
else
|
|
38
|
+
pipe = AggregatePipe.new into
|
|
35
39
|
end
|
|
36
40
|
pipe.setStarts end_pipe if end_pipe
|
|
37
41
|
pipe
|
data/lib/pacer/transform/cap.rb
CHANGED
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
module Pacer
|
|
2
|
+
module Routes
|
|
3
|
+
module RouteOperations
|
|
4
|
+
def reducer(opts = {}, &block)
|
|
5
|
+
chain_route({transform: :reduce, reduce: block}.merge(opts))
|
|
6
|
+
end
|
|
7
|
+
end
|
|
8
|
+
end
|
|
9
|
+
|
|
10
|
+
module Transform
|
|
11
|
+
module Reduce
|
|
12
|
+
# The goal is to break down the xml stream from being a black
|
|
13
|
+
# box iterator to doing the job in a few steps:
|
|
14
|
+
|
|
15
|
+
def help(section = nil)
|
|
16
|
+
case section
|
|
17
|
+
when nil
|
|
18
|
+
puts <<HELP
|
|
19
|
+
|
|
20
|
+
HELP
|
|
21
|
+
when :example
|
|
22
|
+
puts <<HELP
|
|
23
|
+
This example usage is from pacer-xml plugin v0.2. I transform a raw
|
|
24
|
+
stream of lines from a 79MB file that contains > 4000 concatinated xml
|
|
25
|
+
documents averaging 600 lines each. to a stream of imported nodes:
|
|
26
|
+
|
|
27
|
+
First, a little setup: create a graph, open the file and make a route of
|
|
28
|
+
its lines
|
|
29
|
+
|
|
30
|
+
graph = Pacer.tg
|
|
31
|
+
f = File.open '/tmp/ipgb20120103.xml'
|
|
32
|
+
lines = f.each_line.to_route(element_type: :string).route
|
|
33
|
+
|
|
34
|
+
Create a simple reducer that delimits sections when it hits a DTD tag
|
|
35
|
+
and when it gets to the end of the file (that's the s.nil?). and reduces
|
|
36
|
+
the stream by pushing each section's lines into an array. When a section
|
|
37
|
+
is entered, the initial value is provided by the return value of the
|
|
38
|
+
enter block.
|
|
39
|
+
|
|
40
|
+
reducer = lines.reducer(element_type: :array).route
|
|
41
|
+
reducer.enter { |s| [] if s =~ /<\?xml/ }
|
|
42
|
+
reducer.reduce { |s, lines| lines << s }
|
|
43
|
+
reducer.leave { |s, lines| s.nil? or s =~ /<\?xml/ }
|
|
44
|
+
|
|
45
|
+
Now we're back in the territory of fairly vanilla routes. We join each
|
|
46
|
+
section, use the pacer-xml gem's StringRoute#xml method to parse the XML
|
|
47
|
+
with Nokogiri and then its XmlRoute#import method to turn those XML
|
|
48
|
+
nodes into graph elements.
|
|
49
|
+
|
|
50
|
+
vertex = reducer.map(element_type: :string, &:join).xml.limit(1).import(graph).first
|
|
51
|
+
|
|
52
|
+
graph #=> #<PacerGraph tinkergraph[vertices:88 edges:90]
|
|
53
|
+
vertex #=> #<V[0] us-patent-grant>
|
|
54
|
+
|
|
55
|
+
We can see that we've now got a graph with 88 vertices and 90 edges.
|
|
56
|
+
|
|
57
|
+
HELP
|
|
58
|
+
end
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
attr_writer :enter, :reduce, :leave
|
|
62
|
+
|
|
63
|
+
def enter(&block)
|
|
64
|
+
if block
|
|
65
|
+
@enter = block
|
|
66
|
+
end
|
|
67
|
+
self
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
def reduce(&block)
|
|
71
|
+
if block
|
|
72
|
+
@reduce = block
|
|
73
|
+
end
|
|
74
|
+
self
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
def leave(same_as = nil, &block)
|
|
78
|
+
if same_as == :enter
|
|
79
|
+
@leave = @enter
|
|
80
|
+
elsif block
|
|
81
|
+
@leave = block
|
|
82
|
+
end
|
|
83
|
+
self
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
def attach_pipe(end_pipe)
|
|
87
|
+
if @enter and @reduce and @leave
|
|
88
|
+
pipe = ReducerPipe.new self, @enter, @reduce, @leave
|
|
89
|
+
pipe.setStarts end_pipe
|
|
90
|
+
pipe
|
|
91
|
+
else
|
|
92
|
+
fail Pacer::ClientError, 'enter, reduce, and leave must all be specified for reducers'
|
|
93
|
+
end
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
class ReducerPipe < Pacer::Pipes::RubyPipe
|
|
97
|
+
attr_reader :enter, :reduce, :leave, :change_value
|
|
98
|
+
attr_accessor :changed_value, :value_changed
|
|
99
|
+
attr_accessor :next_value
|
|
100
|
+
|
|
101
|
+
def initialize(back, enter, reduce, leave)
|
|
102
|
+
super()
|
|
103
|
+
@change_value = proc do |new_value|
|
|
104
|
+
self.changed_value = new_value
|
|
105
|
+
self.value_changed = true
|
|
106
|
+
end
|
|
107
|
+
@enter = Pacer::Wrappers::WrappingPipeFunction.new back, enter
|
|
108
|
+
@reduce = Pacer::Wrappers::WrappingPipeFunction.new back, reduce
|
|
109
|
+
@leave = Pacer::Wrappers::WrappingPipeFunction.new back, leave
|
|
110
|
+
@next_value = nil
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
def processNextStart
|
|
114
|
+
if next_value
|
|
115
|
+
collecting = true
|
|
116
|
+
value = next_value
|
|
117
|
+
self.next_value = nil
|
|
118
|
+
else
|
|
119
|
+
collecting = false
|
|
120
|
+
end
|
|
121
|
+
leaving = false
|
|
122
|
+
final_value = nil
|
|
123
|
+
while raw_element = starts.next
|
|
124
|
+
if collecting
|
|
125
|
+
if leave.call_with_args(raw_element, value, change_value)
|
|
126
|
+
leaving = true
|
|
127
|
+
return_value = final_value(value)
|
|
128
|
+
collecting = false
|
|
129
|
+
else
|
|
130
|
+
value = reduce.call_with_args(raw_element, value)
|
|
131
|
+
end
|
|
132
|
+
end
|
|
133
|
+
if not collecting
|
|
134
|
+
value = enter.call raw_element
|
|
135
|
+
if value
|
|
136
|
+
collecting = true
|
|
137
|
+
value = reduce.call_with_args(raw_element, value)
|
|
138
|
+
end
|
|
139
|
+
end
|
|
140
|
+
if leaving
|
|
141
|
+
self.next_value = value if collecting
|
|
142
|
+
return return_value
|
|
143
|
+
end
|
|
144
|
+
end
|
|
145
|
+
rescue Pacer::EmptyPipe, java.util.NoSuchElementException
|
|
146
|
+
if collecting and leave.call_with_args(nil, value, change_value)
|
|
147
|
+
return final_value(value)
|
|
148
|
+
end
|
|
149
|
+
raise EmptyPipe.instance
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
private
|
|
153
|
+
|
|
154
|
+
def final_value(value)
|
|
155
|
+
if value_changed
|
|
156
|
+
self.value_changed = false
|
|
157
|
+
value = changed_value
|
|
158
|
+
self.changed_value = nil
|
|
159
|
+
end
|
|
160
|
+
value
|
|
161
|
+
end
|
|
162
|
+
end
|
|
163
|
+
end
|
|
164
|
+
end
|
|
165
|
+
end
|
data/lib/pacer/version.rb
CHANGED
|
Binary file
|
data/pom.xml
CHANGED
|
@@ -7,7 +7,7 @@
|
|
|
7
7
|
<artifactId>pacer</artifactId>
|
|
8
8
|
<!-- NOTE: the following properties are automatically updated based on the values in lib/pacer-neo4j/version.rb -->
|
|
9
9
|
<properties>
|
|
10
|
-
<gem.version>1.1.
|
|
10
|
+
<gem.version>1.1.1</gem.version>
|
|
11
11
|
<blueprints.version>2.1.0</blueprints.version>
|
|
12
12
|
<pipes.version>2.1.0</pipes.version>
|
|
13
13
|
<gremlin.version>2.1.0</gremlin.version>
|
metadata
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
name: pacer
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
4
|
prerelease:
|
|
5
|
-
version: 1.1.
|
|
5
|
+
version: 1.1.1
|
|
6
6
|
platform: java
|
|
7
7
|
authors:
|
|
8
8
|
- Darrick Wiebe
|
|
@@ -40,6 +40,7 @@ files:
|
|
|
40
40
|
- lib/pacer/blueprints/payload_elements.rb
|
|
41
41
|
- lib/pacer/blueprints/ruby_graph.rb
|
|
42
42
|
- lib/pacer/blueprints/tg.rb
|
|
43
|
+
- lib/pacer/core/array_route.rb
|
|
43
44
|
- lib/pacer/core/graph.rb
|
|
44
45
|
- lib/pacer/core/graph/edges_route.rb
|
|
45
46
|
- lib/pacer/core/graph/element_route.rb
|
|
@@ -48,8 +49,10 @@ files:
|
|
|
48
49
|
- lib/pacer/core/graph/mixed_route.rb
|
|
49
50
|
- lib/pacer/core/graph/path_route.rb
|
|
50
51
|
- lib/pacer/core/graph/vertices_route.rb
|
|
52
|
+
- lib/pacer/core/hash_route.rb
|
|
51
53
|
- lib/pacer/core/route.rb
|
|
52
54
|
- lib/pacer/core/side_effect.rb
|
|
55
|
+
- lib/pacer/core/string_route.rb
|
|
53
56
|
- lib/pacer/exceptions.rb
|
|
54
57
|
- lib/pacer/filter/block_filter.rb
|
|
55
58
|
- lib/pacer/filter/collection_filter.rb
|
|
@@ -132,6 +135,7 @@ files:
|
|
|
132
135
|
- lib/pacer/transform/path_tree.rb
|
|
133
136
|
- lib/pacer/transform/payload.rb
|
|
134
137
|
- lib/pacer/transform/process.rb
|
|
138
|
+
- lib/pacer/transform/reduce.rb
|
|
135
139
|
- lib/pacer/transform/scatter.rb
|
|
136
140
|
- lib/pacer/transform/sort_section.rb
|
|
137
141
|
- lib/pacer/transform/stream_sort.rb
|
|
@@ -212,7 +216,7 @@ files:
|
|
|
212
216
|
- spec/support/use_transactions.rb
|
|
213
217
|
- spec/tackle/simple_mixin.rb
|
|
214
218
|
- spec/tackle/tinkerpop_graph_mixins.rb
|
|
215
|
-
- lib/pacer-1.1.
|
|
219
|
+
- lib/pacer-1.1.1-standalone.jar
|
|
216
220
|
homepage: http://github.com/pangloss/pacer
|
|
217
221
|
licenses:
|
|
218
222
|
- MIT
|