json_stream_trigger 0.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 11dbed3cd388bb5830c6956a4792bafb172d249a
4
+ data.tar.gz: 73ff8ca8f6301a079c9ebdddb531b292e683e641
5
+ SHA512:
6
+ metadata.gz: 78a5fcc5b1a352fe79cbfbc0218d5d5f41ae3d34a83a69eb4c0cc85c94fb6004e440d0705c90de0822591b1fc6eca00ee12cf59a082386f4517a99882cc9e23b
7
+ data.tar.gz: ffbcc34b8cdb0564293d548368f13b233804819d691746ca7266b3c84759cae38b1f75e17572f5e9d6e028bd219553ef4744c2e9ec85bd6f1e2b406f9cfe3d39
data/README.md ADDED
@@ -0,0 +1,76 @@
1
+ # Ruby `json_stream_trigger` Gem
2
+
3
+ Instead of parsing a huge JSON files and loading it into memory,
4
+ this library will stream the bytes through
5
+ [json-stream](https://github.com/dgraham/json-stream) and only
6
+ creates a small buffer for objects whose JSONPath matches a pattern you specify.
7
+ When the object is completed, the specified block will be called.
8
+
9
+ Install with `gem "json_stream_trigger"` in your Gemfile.
10
+
11
+ ## Example:
12
+
13
+ ```ruby
14
+ f = File.open('really_big_file.json')
15
+ stream = JsonStreamTrigger.new()
16
+
17
+ # Match each array item. If you wanted to whole array use $.data
18
+ stream.on('$.data[*]') do |json_string|
19
+ import JSON.parse(json_string, :quirks_mode => true)
20
+ end
21
+
22
+ # Will match for $.any.sub[*].item.meta
23
+ stream.on('$..meta') do |json_string|
24
+ save_meta JSON.parse(json_string, :quirks_mode => true)
25
+ end
26
+
27
+ # read in 1MB chunks
28
+ while chunk = f.read(1024)
29
+ stream << chunk
30
+ end
31
+
32
+ ```
33
+
34
+ The captured JSON strinb buffer will be passed to the block. Note, Ruby's JSON library expects
35
+ JSON documents to be passed to it - not primatives - this is why `:quirks_mode => true` has been added.
36
+
37
+ ## Path Details
38
+ The JSONPaths are similar to XPath notation. `$` is the root,
39
+ single wild card keys can be done with `$.*.version`,
40
+ or you can do muli-level wildcard with `$.docs..name`.
41
+ [More info on JSONPath](http://goessner.net/articles/JsonPath/)
42
+
43
+ A few more examples:
44
+
45
+ ```javascript
46
+ {
47
+ meta: {version: 0.1},
48
+ docs: [
49
+ {id: 1},
50
+ {id: 2},
51
+ {id: 3},
52
+ {id: 4},
53
+ {
54
+ id: 5,
55
+ user: {
56
+ name: "Tyler"
57
+ }
58
+ }
59
+ ]
60
+ }
61
+ ```
62
+
63
+ ```ruby
64
+ on('$.docs[*].id') # triggers for id property of every item in docs array
65
+ on('$.docs') # returns full array of items
66
+ on('$.docs[*]') # triggers for each item in the array
67
+ on('$.docs[1].id') # returns value of ID 1
68
+ on('$.docs[*].*.name') # returns 'Tyler'
69
+ on('$..name') # matches any value who's key is 'name'
70
+ ```
71
+
72
+
73
+
74
+ ## Tests
75
+ `rake test`
76
+
data/lib/json_path.rb ADDED
@@ -0,0 +1,22 @@
1
+
2
+ class JsonPath
3
+
4
+ def self.matches?(path, pattern)
5
+ re = self.convert_to_re(pattern)
6
+ !path.match(re).nil?
7
+ end
8
+
9
+ def self.convert_to_re(pattern)
10
+ re = pattern.dup
11
+ re = re.gsub('[', '\[').gsub(']', '\]')# escape brackets
12
+ re.gsub!(/^\$/, '^\$') # escape $ and fix it to root
13
+ re.gsub!(/ \\\[ \* \\\] /x, '\[\d+\]') # change [*] to [\d+]
14
+ re.gsub!('..', '(?<=[\.\$\]]).*[\.\]]') # change .. to match a dot, $, or ] followed by anything, and ending in a . or ]
15
+ re.gsub!('.*.', '\.[^\.\[\]]+\.') # wild card will match any key
16
+
17
+ re += '(?=$)' #'(?=$|\.)
18
+ return Regexp.new(re)
19
+ end
20
+
21
+ end
22
+
@@ -0,0 +1,188 @@
1
+ require 'json'
2
+ require 'json/stream'
3
+ require_relative 'json_path'
4
+
5
+ class JsonStreamTrigger
6
+ attr_reader :key_path, :triggers, :full_buffer
7
+ DEBUG=false
8
+
9
+ def initialize()
10
+ @parser = JSON::Stream::Parser.new
11
+
12
+ @parser.start_object &method(:start_object)
13
+ @parser.end_object &method(:end_object)
14
+ @parser.start_array &method(:start_array)
15
+ @parser.end_array &method(:end_array)
16
+ @parser.key &method(:key)
17
+ @parser.value &method(:value)
18
+
19
+ @last_call = nil
20
+ @key_path = ''
21
+ @triggers = {}
22
+ @active_buffers = {}
23
+ @full_buffer = ''
24
+ end
25
+
26
+ def on(pattern, &block)
27
+ @triggers[pattern] = block
28
+ end
29
+
30
+ def <<(bytes)
31
+ debug "bytes: #{bytes.inspect}"
32
+ @parser << bytes
33
+ @full_buffer << bytes if DEBUG
34
+ end
35
+
36
+ def path_matches?(pattern)
37
+ JsonPath.matches?(@key_path, pattern)
38
+ end
39
+
40
+ protected
41
+
42
+
43
+ ################################ PARSING TRIGGERS ###########################
44
+ #def start_document
45
+ #end
46
+ #def end_document
47
+ #end
48
+
49
+ def start_object
50
+ debug "start object"
51
+ @key_path << (@key_path.empty? ? "$" : '')
52
+ increment_path_array() do
53
+ activate_buffers_for_matching()
54
+ end
55
+ append_buffers ',{'
56
+ @last_call = :start_object
57
+ end
58
+
59
+ def end_object
60
+ debug "end object"
61
+ append_buffers '}' if @last_call == :start_object
62
+ trigger_block_for_matching()
63
+ append_buffers '}' if @last_call != :start_object
64
+ trim_segment(/[\.\$][^\.\[\[]+$/) # remove last .key
65
+ @last_call = :end_object
66
+ end
67
+
68
+ def start_array
69
+ debug "start array"
70
+ increment_path_array() do
71
+ activate_buffers_for_matching()
72
+ end
73
+ append_buffers ',['
74
+ @key_path << (@key_path.empty? ? "$[]" : "[]")
75
+ activate_buffers_for_matching()
76
+ @last_call = :start_array
77
+ end
78
+
79
+ def end_array
80
+ debug "end array"
81
+ append_buffers ']'
82
+
83
+ trim_segment(/\[\d*\]+$/) # remove last [\d] and check triggers to match .my-array
84
+ trigger_block_for_matching()
85
+ trim_segment(/[\.$][^\.\[\]]+$/) # remove last .my-array
86
+ @last_call = :end_array
87
+ end
88
+
89
+ def key(k)
90
+ debug "new key '#{k}'"
91
+ trim_segment(/\.[^\.\[\]]+$/) unless @last_call == :start_object# remove last .key[\d]
92
+ @key_path << ".#{k}"
93
+ append_buffers ",\"#{k}\":"
94
+ activate_buffers_for_matching()
95
+ @last_call = :key
96
+ end
97
+
98
+ def value(v)
99
+ debug "value '#{v}'"
100
+ increment_path_array() do
101
+ activate_buffers_for_matching()
102
+ end
103
+
104
+ append_buffers ','
105
+ append_buffers JSON.dump(v)
106
+
107
+ trigger_block_for_matching()
108
+ @last_call = :value
109
+ end
110
+
111
+ ################################ BUFFER STUFF ###########################
112
+
113
+ # Called when we know the name of the object/array we are workign with
114
+ def activate_buffers_for_matching
115
+ @triggers.keys.each do |pattern|
116
+ debug "checking #{@key_path} matches #{pattern}"
117
+ if JsonPath.matches?(@key_path, pattern) && !@active_buffers.keys.include?(pattern)
118
+ debug ">> Activating buffer for #{pattern.inspect}"
119
+ @active_buffers[pattern] = ''
120
+ end
121
+ end
122
+ end
123
+
124
+ # To be called when exiting an object or array so the buffer is completed
125
+ def trigger_block_for_matching
126
+ active_patterns = @active_buffers.keys
127
+ active_patterns.each do |pattern|
128
+ if JsonPath.matches?(@key_path, pattern)
129
+ debug "<< Calling trigger for '#{pattern}'"
130
+ @triggers[pattern].call @active_buffers[pattern]
131
+ if pattern[-3..3] == '[*]'
132
+ @active_buffers[pattern] = ''
133
+ debug "Clearing buffer for '#{pattern}'"
134
+ else
135
+ @active_buffers.delete(pattern)
136
+ debug "Stopping buffer for '#{pattern}'"
137
+ end
138
+ end
139
+ end
140
+ end
141
+
142
+ def append_buffers(bytes)
143
+ @active_buffers.keys.each do |k|
144
+ # remove comma if it's not needed
145
+ if bytes[0] == ',' && [nil, '[', '{', ':'].include?(@active_buffers[k][-1, 1])
146
+ bytes = bytes[1..-1]
147
+ end
148
+ @active_buffers[k] += "#{bytes}"
149
+ debug "Appended to #{k} => '#{bytes}' to buffer '#{@active_buffers[k]}'"
150
+ end
151
+ end
152
+
153
+ ############################### PATH STUFF #######################
154
+
155
+ # trim off the last segment of the key_path
156
+ def trim_segment(re)
157
+ @key_path.sub!(re, '')
158
+ @key_path << '$' if @key_path == '' # add back the $ if we trimmed it off
159
+ debug " trimmed off #{re}"
160
+ end
161
+
162
+ def increment_path_array(&block)
163
+ # Increment the array index if we are in an array
164
+ # Note: Xpath indexes start at 1
165
+ # pull out the [\d] from the last array index
166
+ did_update = false
167
+ @key_path.sub!(/\[(\d*)\]$/) do |m, x|
168
+ debug "incrementing path array: #{@key_path}"
169
+ new_i = m.match(/\[(\d*)\]/)[1].to_i + 1
170
+ did_update = true
171
+ debug " new array i = #{new_i}"
172
+ '[' + new_i.to_s + ']'
173
+ end
174
+
175
+ if did_update
176
+ block.call()
177
+ end
178
+
179
+ end
180
+
181
+
182
+
183
+ def debug(msg)
184
+ indent = 60
185
+ puts msg + (" " * [0, (indent - msg.length)].max ) + "PATH: #{@key_path}" if DEBUG
186
+ end
187
+
188
+ end
metadata ADDED
@@ -0,0 +1,47 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: json_stream_trigger
3
+ version: !ruby/object:Gem::Version
4
+ version: '0.1'
5
+ platform: ruby
6
+ authors:
7
+ - Tyler Roberts
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2016-01-12 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: ''
14
+ email: code@polar-concepts.com
15
+ executables: []
16
+ extensions: []
17
+ extra_rdoc_files: []
18
+ files:
19
+ - README.md
20
+ - lib/json_path.rb
21
+ - lib/json_stream_trigger.rb
22
+ homepage: http://github.com/bdevel/json_stream_trigger
23
+ licenses:
24
+ - MIT
25
+ metadata: {}
26
+ post_install_message:
27
+ rdoc_options: []
28
+ require_paths:
29
+ - lib
30
+ required_ruby_version: !ruby/object:Gem::Requirement
31
+ requirements:
32
+ - - '>='
33
+ - !ruby/object:Gem::Version
34
+ version: '0'
35
+ required_rubygems_version: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - '>='
38
+ - !ruby/object:Gem::Version
39
+ version: '0'
40
+ requirements: []
41
+ rubyforge_project:
42
+ rubygems_version: 2.2.2
43
+ signing_key:
44
+ specification_version: 4
45
+ summary: Parse large JSON files as a stream and trigger events upon key matching.
46
+ test_files: []
47
+ has_rdoc: