pacer 1.1.0-java → 1.1.1-java
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/pacer/core/array_route.rb +78 -0
- data/lib/pacer/core/graph/path_route.rb +6 -41
- data/lib/pacer/core/hash_route.rb +10 -0
- data/lib/pacer/core/string_route.rb +10 -0
- data/lib/pacer/loader.rb +4 -0
- data/lib/pacer/route_builder.rb +4 -1
- data/lib/pacer/side_effect/aggregate.rb +9 -5
- data/lib/pacer/transform/cap.rb +2 -2
- data/lib/pacer/transform/gather.rb +2 -2
- data/lib/pacer/transform/reduce.rb +165 -0
- data/lib/pacer/version.rb +1 -1
- data/lib/{pacer-1.1.0-standalone.jar → pacer-1.1.1-standalone.jar} +0 -0
- data/pom.xml +1 -1
- data/spec/pacer/transform/path_tree_spec.rb +1 -1
- metadata +6 -2
@@ -0,0 +1,78 @@
|
|
1
|
+
module Pacer
|
2
|
+
module Core
|
3
|
+
module ArrayRoute
|
4
|
+
def help(section = nil)
|
5
|
+
case section
|
6
|
+
when :arrays
|
7
|
+
puts <<HELP
|
8
|
+
The following array route methods are available:
|
9
|
+
|
10
|
+
#lengths Return the length of each array
|
11
|
+
|
12
|
+
#transpose Route version of Ruby's Array#transpase
|
13
|
+
|
14
|
+
#compacted Removes nils from each array
|
15
|
+
|
16
|
+
#heads Route to only the first element from each array
|
17
|
+
|
18
|
+
#tails Route to only the last element from each array
|
19
|
+
|
20
|
+
#pairs(head, tail) Route to an array of only the head and tail elements
|
21
|
+
head: Number Array index of the : first : element in the pair
|
22
|
+
tail: Number : second :
|
23
|
+
|
24
|
+
#len(n) Filter paths by length
|
25
|
+
n: Number | Range
|
26
|
+
|
27
|
+
HELP
|
28
|
+
else
|
29
|
+
super
|
30
|
+
end
|
31
|
+
description
|
32
|
+
end
|
33
|
+
|
34
|
+
def lengths
|
35
|
+
map(element_type: :integer) { |s| s.length }
|
36
|
+
end
|
37
|
+
|
38
|
+
# This could be done more efficiently by reimplementing
|
39
|
+
# transpose... Right now it needs 2n memory.
|
40
|
+
def transpose
|
41
|
+
gather { [] }.
|
42
|
+
map(element_type: :array) { |a| a.transpose }.
|
43
|
+
scatter(element_type: :array)
|
44
|
+
end
|
45
|
+
|
46
|
+
def compacted
|
47
|
+
map element_type: element_type, route_name: 'compact' do |a|
|
48
|
+
a.compact
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
def heads(et = nil)
|
53
|
+
map element_type: et, route_name: 'heads' do |a|
|
54
|
+
a.first
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
def tails(et = nil)
|
59
|
+
map element_type: et, route_name: 'tails' do |a|
|
60
|
+
a.last
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
def pairs(head = 0, tail = -1)
|
65
|
+
map element_type: element_type, route_name: "pairs[#{ head },#{ tail }]" do |a|
|
66
|
+
[a[head], a[tail]]
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
def len(n)
|
71
|
+
select do |path|
|
72
|
+
n === path.length
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
@@ -4,9 +4,9 @@ module Pacer::Core::Graph
|
|
4
4
|
case section
|
5
5
|
when :paths
|
6
6
|
puts <<HELP
|
7
|
-
The following path
|
7
|
+
The following path-specific route methods are available:
|
8
8
|
|
9
|
-
|
9
|
+
See also the :arrays section for more available methods
|
10
10
|
|
11
11
|
#subgraph(target_graph, opts) Add each element in the path to the graph
|
12
12
|
target_graph: PacerGraph (optional) if not specified creates a new TG.
|
@@ -18,19 +18,6 @@ The following path helper methods are available:
|
|
18
18
|
ignore_missing_vertices: Boolean Squelches the above mentioned exception
|
19
19
|
show_missing_vertices: Boolean Complain about missing vertices
|
20
20
|
|
21
|
-
#compact_paths Removes nils from paths
|
22
|
-
|
23
|
-
#heads Route to only the first element from each path
|
24
|
-
|
25
|
-
#tails Route to only the last element from each path
|
26
|
-
|
27
|
-
#pairs(head, tail) Route to a mini path of only the first and last elements
|
28
|
-
head: Number Array index of the : first : element in the pair
|
29
|
-
tail: Number : second :
|
30
|
-
|
31
|
-
#len(n) Filter paths by length
|
32
|
-
n: Number | Range
|
33
|
-
|
34
21
|
#hashify Make a hash of the properties and relationships of the path
|
35
22
|
This is just a simple view on the data to facilitate analysis
|
36
23
|
|
@@ -41,10 +28,6 @@ HELP
|
|
41
28
|
description
|
42
29
|
end
|
43
30
|
|
44
|
-
def transpose
|
45
|
-
collect { |arraylist| arraylist.to_a }.transpose
|
46
|
-
end
|
47
|
-
|
48
31
|
def subgraph(target_graph = nil, opts = {})
|
49
32
|
raise "Can't create a subgraph within itself." if target_graph == graph
|
50
33
|
target_graph ||= Pacer.tg
|
@@ -82,34 +65,16 @@ HELP
|
|
82
65
|
end
|
83
66
|
end
|
84
67
|
|
85
|
-
def
|
86
|
-
map
|
87
|
-
path.compact
|
88
|
-
end
|
68
|
+
def transpose
|
69
|
+
map(element_type: :array, &:to_a).transpose
|
89
70
|
end
|
90
71
|
|
91
72
|
def heads(et = :vertex)
|
92
|
-
|
93
|
-
path.first
|
94
|
-
end
|
73
|
+
super et
|
95
74
|
end
|
96
75
|
|
97
76
|
def tails(et = :vertex)
|
98
|
-
|
99
|
-
path.last
|
100
|
-
end
|
101
|
-
end
|
102
|
-
|
103
|
-
def pairs(head = 0, tail = -1)
|
104
|
-
map element_type: :path, route_name: "pairs[#{ head },#{ tail }]" do |path|
|
105
|
-
[path[head], path[tail]]
|
106
|
-
end
|
107
|
-
end
|
108
|
-
|
109
|
-
def len(n)
|
110
|
-
select do |path|
|
111
|
-
n === path.length
|
112
|
-
end
|
77
|
+
super et
|
113
78
|
end
|
114
79
|
|
115
80
|
def hashify
|
data/lib/pacer/loader.rb
CHANGED
@@ -19,6 +19,9 @@ require 'pacer/exceptions'
|
|
19
19
|
require 'pacer/pipes'
|
20
20
|
|
21
21
|
require 'pacer/core/route'
|
22
|
+
require 'pacer/core/string_route'
|
23
|
+
require 'pacer/core/array_route'
|
24
|
+
require 'pacer/core/hash_route'
|
22
25
|
require 'pacer/core/graph'
|
23
26
|
require 'pacer/core/side_effect'
|
24
27
|
|
@@ -83,6 +86,7 @@ require 'pacer/transform/stream_uniq'
|
|
83
86
|
require 'pacer/transform/gather'
|
84
87
|
require 'pacer/transform/map'
|
85
88
|
require 'pacer/transform/flat_map'
|
89
|
+
require 'pacer/transform/reduce'
|
86
90
|
require 'pacer/transform/make_pairs'
|
87
91
|
require 'pacer/transform/process'
|
88
92
|
require 'pacer/transform/join'
|
data/lib/pacer/route_builder.rb
CHANGED
@@ -25,7 +25,10 @@ module Pacer
|
|
25
25
|
element_types[:vertex] = [Pacer::Core::Graph::ElementRoute, Pacer::Core::Graph::VerticesRoute]
|
26
26
|
element_types[:edge] = [Pacer::Core::Graph::ElementRoute, Pacer::Core::Graph::EdgesRoute]
|
27
27
|
element_types[:mixed] = [Pacer::Core::Graph::ElementRoute, Pacer::Core::Graph::MixedRoute]
|
28
|
-
element_types[:path] = [Pacer::Core::Graph::PathRoute]
|
28
|
+
element_types[:path] = [Pacer::Core::ArrayRoute, Pacer::Core::Graph::PathRoute]
|
29
|
+
element_types[:string] = [Pacer::Core::StringRoute]
|
30
|
+
element_types[:array] = [Pacer::Core::ArrayRoute]
|
31
|
+
element_types[:hash] = [Pacer::Core::HashRoute]
|
29
32
|
end
|
30
33
|
|
31
34
|
def chain(source, args)
|
@@ -1,9 +1,10 @@
|
|
1
1
|
module Pacer
|
2
2
|
module Routes::RouteOperations
|
3
|
-
def aggregate(into = nil)
|
3
|
+
def aggregate(into = nil, &block)
|
4
4
|
aggregate = ::Pacer::SideEffect::Aggregate
|
5
5
|
r = self
|
6
6
|
r = section(into, aggregate::ElementSet) if into.is_a? Symbol
|
7
|
+
into = block if block
|
7
8
|
r.chain_route :side_effect => aggregate, :into => into
|
8
9
|
end
|
9
10
|
end
|
@@ -25,13 +26,16 @@ module Pacer
|
|
25
26
|
protected
|
26
27
|
|
27
28
|
def attach_pipe(end_pipe)
|
28
|
-
|
29
|
+
case into
|
30
|
+
when Symbol
|
29
31
|
hs = vars[into] = HashSet.new
|
30
32
|
pipe = AggregatePipe.new hs
|
31
|
-
|
32
|
-
pipe = AggregatePipe.new into
|
33
|
-
|
33
|
+
when Proc
|
34
|
+
pipe = AggregatePipe.new into.call(self)
|
35
|
+
when nil
|
34
36
|
pipe = AggregatePipe.new HashSet.new
|
37
|
+
else
|
38
|
+
pipe = AggregatePipe.new into
|
35
39
|
end
|
36
40
|
pipe.setStarts end_pipe if end_pipe
|
37
41
|
pipe
|
data/lib/pacer/transform/cap.rb
CHANGED
@@ -0,0 +1,165 @@
|
|
1
|
+
module Pacer
|
2
|
+
module Routes
|
3
|
+
module RouteOperations
|
4
|
+
def reducer(opts = {}, &block)
|
5
|
+
chain_route({transform: :reduce, reduce: block}.merge(opts))
|
6
|
+
end
|
7
|
+
end
|
8
|
+
end
|
9
|
+
|
10
|
+
module Transform
|
11
|
+
module Reduce
|
12
|
+
# The goal is to break down the xml stream from being a black
|
13
|
+
# box iterator to doing the job in a few steps:
|
14
|
+
|
15
|
+
def help(section = nil)
|
16
|
+
case section
|
17
|
+
when nil
|
18
|
+
puts <<HELP
|
19
|
+
|
20
|
+
HELP
|
21
|
+
when :example
|
22
|
+
puts <<HELP
|
23
|
+
This example usage is from pacer-xml plugin v0.2. I transform a raw
|
24
|
+
stream of lines from a 79MB file that contains > 4000 concatinated xml
|
25
|
+
documents averaging 600 lines each. to a stream of imported nodes:
|
26
|
+
|
27
|
+
First, a little setup: create a graph, open the file and make a route of
|
28
|
+
its lines
|
29
|
+
|
30
|
+
graph = Pacer.tg
|
31
|
+
f = File.open '/tmp/ipgb20120103.xml'
|
32
|
+
lines = f.each_line.to_route(element_type: :string).route
|
33
|
+
|
34
|
+
Create a simple reducer that delimits sections when it hits a DTD tag
|
35
|
+
and when it gets to the end of the file (that's the s.nil?). and reduces
|
36
|
+
the stream by pushing each section's lines into an array. When a section
|
37
|
+
is entered, the initial value is provided by the return value of the
|
38
|
+
enter block.
|
39
|
+
|
40
|
+
reducer = lines.reducer(element_type: :array).route
|
41
|
+
reducer.enter { |s| [] if s =~ /<\?xml/ }
|
42
|
+
reducer.reduce { |s, lines| lines << s }
|
43
|
+
reducer.leave { |s, lines| s.nil? or s =~ /<\?xml/ }
|
44
|
+
|
45
|
+
Now we're back in the territory of fairly vanilla routes. We join each
|
46
|
+
section, use the pacer-xml gem's StringRoute#xml method to parse the XML
|
47
|
+
with Nokogiri and then its XmlRoute#import method to turn those XML
|
48
|
+
nodes into graph elements.
|
49
|
+
|
50
|
+
vertex = reducer.map(element_type: :string, &:join).xml.limit(1).import(graph).first
|
51
|
+
|
52
|
+
graph #=> #<PacerGraph tinkergraph[vertices:88 edges:90]
|
53
|
+
vertex #=> #<V[0] us-patent-grant>
|
54
|
+
|
55
|
+
We can see that we've now got a graph with 88 vertices and 90 edges.
|
56
|
+
|
57
|
+
HELP
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
attr_writer :enter, :reduce, :leave
|
62
|
+
|
63
|
+
def enter(&block)
|
64
|
+
if block
|
65
|
+
@enter = block
|
66
|
+
end
|
67
|
+
self
|
68
|
+
end
|
69
|
+
|
70
|
+
def reduce(&block)
|
71
|
+
if block
|
72
|
+
@reduce = block
|
73
|
+
end
|
74
|
+
self
|
75
|
+
end
|
76
|
+
|
77
|
+
def leave(same_as = nil, &block)
|
78
|
+
if same_as == :enter
|
79
|
+
@leave = @enter
|
80
|
+
elsif block
|
81
|
+
@leave = block
|
82
|
+
end
|
83
|
+
self
|
84
|
+
end
|
85
|
+
|
86
|
+
def attach_pipe(end_pipe)
|
87
|
+
if @enter and @reduce and @leave
|
88
|
+
pipe = ReducerPipe.new self, @enter, @reduce, @leave
|
89
|
+
pipe.setStarts end_pipe
|
90
|
+
pipe
|
91
|
+
else
|
92
|
+
fail Pacer::ClientError, 'enter, reduce, and leave must all be specified for reducers'
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
class ReducerPipe < Pacer::Pipes::RubyPipe
|
97
|
+
attr_reader :enter, :reduce, :leave, :change_value
|
98
|
+
attr_accessor :changed_value, :value_changed
|
99
|
+
attr_accessor :next_value
|
100
|
+
|
101
|
+
def initialize(back, enter, reduce, leave)
|
102
|
+
super()
|
103
|
+
@change_value = proc do |new_value|
|
104
|
+
self.changed_value = new_value
|
105
|
+
self.value_changed = true
|
106
|
+
end
|
107
|
+
@enter = Pacer::Wrappers::WrappingPipeFunction.new back, enter
|
108
|
+
@reduce = Pacer::Wrappers::WrappingPipeFunction.new back, reduce
|
109
|
+
@leave = Pacer::Wrappers::WrappingPipeFunction.new back, leave
|
110
|
+
@next_value = nil
|
111
|
+
end
|
112
|
+
|
113
|
+
def processNextStart
|
114
|
+
if next_value
|
115
|
+
collecting = true
|
116
|
+
value = next_value
|
117
|
+
self.next_value = nil
|
118
|
+
else
|
119
|
+
collecting = false
|
120
|
+
end
|
121
|
+
leaving = false
|
122
|
+
final_value = nil
|
123
|
+
while raw_element = starts.next
|
124
|
+
if collecting
|
125
|
+
if leave.call_with_args(raw_element, value, change_value)
|
126
|
+
leaving = true
|
127
|
+
return_value = final_value(value)
|
128
|
+
collecting = false
|
129
|
+
else
|
130
|
+
value = reduce.call_with_args(raw_element, value)
|
131
|
+
end
|
132
|
+
end
|
133
|
+
if not collecting
|
134
|
+
value = enter.call raw_element
|
135
|
+
if value
|
136
|
+
collecting = true
|
137
|
+
value = reduce.call_with_args(raw_element, value)
|
138
|
+
end
|
139
|
+
end
|
140
|
+
if leaving
|
141
|
+
self.next_value = value if collecting
|
142
|
+
return return_value
|
143
|
+
end
|
144
|
+
end
|
145
|
+
rescue Pacer::EmptyPipe, java.util.NoSuchElementException
|
146
|
+
if collecting and leave.call_with_args(nil, value, change_value)
|
147
|
+
return final_value(value)
|
148
|
+
end
|
149
|
+
raise EmptyPipe.instance
|
150
|
+
end
|
151
|
+
|
152
|
+
private
|
153
|
+
|
154
|
+
def final_value(value)
|
155
|
+
if value_changed
|
156
|
+
self.value_changed = false
|
157
|
+
value = changed_value
|
158
|
+
self.changed_value = nil
|
159
|
+
end
|
160
|
+
value
|
161
|
+
end
|
162
|
+
end
|
163
|
+
end
|
164
|
+
end
|
165
|
+
end
|
data/lib/pacer/version.rb
CHANGED
Binary file
|
data/pom.xml
CHANGED
@@ -7,7 +7,7 @@
|
|
7
7
|
<artifactId>pacer</artifactId>
|
8
8
|
<!-- NOTE: the following properties are automatically updated based on the values in lib/pacer-neo4j/version.rb -->
|
9
9
|
<properties>
|
10
|
-
<gem.version>1.1.
|
10
|
+
<gem.version>1.1.1</gem.version>
|
11
11
|
<blueprints.version>2.1.0</blueprints.version>
|
12
12
|
<pipes.version>2.1.0</pipes.version>
|
13
13
|
<gremlin.version>2.1.0</gremlin.version>
|
metadata
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
name: pacer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
4
|
prerelease:
|
5
|
-
version: 1.1.
|
5
|
+
version: 1.1.1
|
6
6
|
platform: java
|
7
7
|
authors:
|
8
8
|
- Darrick Wiebe
|
@@ -40,6 +40,7 @@ files:
|
|
40
40
|
- lib/pacer/blueprints/payload_elements.rb
|
41
41
|
- lib/pacer/blueprints/ruby_graph.rb
|
42
42
|
- lib/pacer/blueprints/tg.rb
|
43
|
+
- lib/pacer/core/array_route.rb
|
43
44
|
- lib/pacer/core/graph.rb
|
44
45
|
- lib/pacer/core/graph/edges_route.rb
|
45
46
|
- lib/pacer/core/graph/element_route.rb
|
@@ -48,8 +49,10 @@ files:
|
|
48
49
|
- lib/pacer/core/graph/mixed_route.rb
|
49
50
|
- lib/pacer/core/graph/path_route.rb
|
50
51
|
- lib/pacer/core/graph/vertices_route.rb
|
52
|
+
- lib/pacer/core/hash_route.rb
|
51
53
|
- lib/pacer/core/route.rb
|
52
54
|
- lib/pacer/core/side_effect.rb
|
55
|
+
- lib/pacer/core/string_route.rb
|
53
56
|
- lib/pacer/exceptions.rb
|
54
57
|
- lib/pacer/filter/block_filter.rb
|
55
58
|
- lib/pacer/filter/collection_filter.rb
|
@@ -132,6 +135,7 @@ files:
|
|
132
135
|
- lib/pacer/transform/path_tree.rb
|
133
136
|
- lib/pacer/transform/payload.rb
|
134
137
|
- lib/pacer/transform/process.rb
|
138
|
+
- lib/pacer/transform/reduce.rb
|
135
139
|
- lib/pacer/transform/scatter.rb
|
136
140
|
- lib/pacer/transform/sort_section.rb
|
137
141
|
- lib/pacer/transform/stream_sort.rb
|
@@ -212,7 +216,7 @@ files:
|
|
212
216
|
- spec/support/use_transactions.rb
|
213
217
|
- spec/tackle/simple_mixin.rb
|
214
218
|
- spec/tackle/tinkerpop_graph_mixins.rb
|
215
|
-
- lib/pacer-1.1.
|
219
|
+
- lib/pacer-1.1.1-standalone.jar
|
216
220
|
homepage: http://github.com/pangloss/pacer
|
217
221
|
licenses:
|
218
222
|
- MIT
|