json_stream_trigger 0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 11dbed3cd388bb5830c6956a4792bafb172d249a
4
+ data.tar.gz: 73ff8ca8f6301a079c9ebdddb531b292e683e641
5
+ SHA512:
6
+ metadata.gz: 78a5fcc5b1a352fe79cbfbc0218d5d5f41ae3d34a83a69eb4c0cc85c94fb6004e440d0705c90de0822591b1fc6eca00ee12cf59a082386f4517a99882cc9e23b
7
+ data.tar.gz: ffbcc34b8cdb0564293d548368f13b233804819d691746ca7266b3c84759cae38b1f75e17572f5e9d6e028bd219553ef4744c2e9ec85bd6f1e2b406f9cfe3d39
data/README.md ADDED
@@ -0,0 +1,76 @@
1
+ # Ruby `json_stream_trigger` Gem
2
+
3
+ Instead of parsing a huge JSON files and loading it into memory,
4
+ this library will stream the bytes through
5
+ [json-stream](https://github.com/dgraham/json-stream) and only
6
+ creates a small buffer for objects whose JSONPath matches a pattern you specify.
7
+ When the object is completed, the specified block will be called.
8
+
9
+ Install with `gem "json_stream_trigger"` in your Gemfile.
10
+
11
+ ## Example:
12
+
13
+ ```ruby
14
+ f = File.open('really_big_file.json')
15
+ stream = JsonStreamTrigger.new()
16
+
17
+ # Match each array item. If you wanted to whole array use $.data
18
+ stream.on('$.data[*]') do |json_string|
19
+ import JSON.parse(json_string, :quirks_mode => true)
20
+ end
21
+
22
+ # Will match for $.any.sub[*].item.meta
23
+ stream.on('$..meta') do |json_string|
24
+ save_meta JSON.parse(json_string, :quirks_mode => true)
25
+ end
26
+
27
+ # read in 1MB chunks
28
+ while chunk = f.read(1024)
29
+ stream << chunk
30
+ end
31
+
32
+ ```
33
+
34
+ The captured JSON strinb buffer will be passed to the block. Note, Ruby's JSON library expects
35
+ JSON documents to be passed to it - not primatives - this is why `:quirks_mode => true` has been added.
36
+
37
+ ## Path Details
38
+ The JSONPaths are similar to XPath notation. `$` is the root,
39
+ single wild card keys can be done with `$.*.version`,
40
+ or you can do muli-level wildcard with `$.docs..name`.
41
+ [More info on JSONPath](http://goessner.net/articles/JsonPath/)
42
+
43
+ A few more examples:
44
+
45
+ ```javascript
46
+ {
47
+ meta: {version: 0.1},
48
+ docs: [
49
+ {id: 1},
50
+ {id: 2},
51
+ {id: 3},
52
+ {id: 4},
53
+ {
54
+ id: 5,
55
+ user: {
56
+ name: "Tyler"
57
+ }
58
+ }
59
+ ]
60
+ }
61
+ ```
62
+
63
+ ```ruby
64
+ on('$.docs[*].id') # triggers for id property of every item in docs array
65
+ on('$.docs') # returns full array of items
66
+ on('$.docs[*]') # triggers for each item in the array
67
+ on('$.docs[1].id') # returns value of ID 1
68
+ on('$.docs[*].*.name') # returns 'Tyler'
69
+ on('$..name') # matches any value who's key is 'name'
70
+ ```
71
+
72
+
73
+
74
+ ## Tests
75
+ `rake test`
76
+
data/lib/json_path.rb ADDED
@@ -0,0 +1,22 @@
1
+
2
+ class JsonPath
3
+
4
+ def self.matches?(path, pattern)
5
+ re = self.convert_to_re(pattern)
6
+ !path.match(re).nil?
7
+ end
8
+
9
+ def self.convert_to_re(pattern)
10
+ re = pattern.dup
11
+ re = re.gsub('[', '\[').gsub(']', '\]')# escape brackets
12
+ re.gsub!(/^\$/, '^\$') # escape $ and fix it to root
13
+ re.gsub!(/ \\\[ \* \\\] /x, '\[\d+\]') # change [*] to [\d+]
14
+ re.gsub!('..', '(?<=[\.\$\]]).*[\.\]]') # change .. to match a dot, $, or ] followed by anything, and ending in a . or ]
15
+ re.gsub!('.*.', '\.[^\.\[\]]+\.') # wild card will match any key
16
+
17
+ re += '(?=$)' #'(?=$|\.)
18
+ return Regexp.new(re)
19
+ end
20
+
21
+ end
22
+
@@ -0,0 +1,188 @@
1
+ require 'json'
2
+ require 'json/stream'
3
+ require_relative 'json_path'
4
+
5
+ class JsonStreamTrigger
6
+ attr_reader :key_path, :triggers, :full_buffer
7
+ DEBUG=false
8
+
9
+ def initialize()
10
+ @parser = JSON::Stream::Parser.new
11
+
12
+ @parser.start_object &method(:start_object)
13
+ @parser.end_object &method(:end_object)
14
+ @parser.start_array &method(:start_array)
15
+ @parser.end_array &method(:end_array)
16
+ @parser.key &method(:key)
17
+ @parser.value &method(:value)
18
+
19
+ @last_call = nil
20
+ @key_path = ''
21
+ @triggers = {}
22
+ @active_buffers = {}
23
+ @full_buffer = ''
24
+ end
25
+
26
+ def on(pattern, &block)
27
+ @triggers[pattern] = block
28
+ end
29
+
30
+ def <<(bytes)
31
+ debug "bytes: #{bytes.inspect}"
32
+ @parser << bytes
33
+ @full_buffer << bytes if DEBUG
34
+ end
35
+
36
+ def path_matches?(pattern)
37
+ JsonPath.matches?(@key_path, pattern)
38
+ end
39
+
40
+ protected
41
+
42
+
43
+ ################################ PARSING TRIGGERS ###########################
44
+ #def start_document
45
+ #end
46
+ #def end_document
47
+ #end
48
+
49
+ def start_object
50
+ debug "start object"
51
+ @key_path << (@key_path.empty? ? "$" : '')
52
+ increment_path_array() do
53
+ activate_buffers_for_matching()
54
+ end
55
+ append_buffers ',{'
56
+ @last_call = :start_object
57
+ end
58
+
59
+ def end_object
60
+ debug "end object"
61
+ append_buffers '}' if @last_call == :start_object
62
+ trigger_block_for_matching()
63
+ append_buffers '}' if @last_call != :start_object
64
+ trim_segment(/[\.\$][^\.\[\[]+$/) # remove last .key
65
+ @last_call = :end_object
66
+ end
67
+
68
+ def start_array
69
+ debug "start array"
70
+ increment_path_array() do
71
+ activate_buffers_for_matching()
72
+ end
73
+ append_buffers ',['
74
+ @key_path << (@key_path.empty? ? "$[]" : "[]")
75
+ activate_buffers_for_matching()
76
+ @last_call = :start_array
77
+ end
78
+
79
+ def end_array
80
+ debug "end array"
81
+ append_buffers ']'
82
+
83
+ trim_segment(/\[\d*\]+$/) # remove last [\d] and check triggers to match .my-array
84
+ trigger_block_for_matching()
85
+ trim_segment(/[\.$][^\.\[\]]+$/) # remove last .my-array
86
+ @last_call = :end_array
87
+ end
88
+
89
+ def key(k)
90
+ debug "new key '#{k}'"
91
+ trim_segment(/\.[^\.\[\]]+$/) unless @last_call == :start_object# remove last .key[\d]
92
+ @key_path << ".#{k}"
93
+ append_buffers ",\"#{k}\":"
94
+ activate_buffers_for_matching()
95
+ @last_call = :key
96
+ end
97
+
98
+ def value(v)
99
+ debug "value '#{v}'"
100
+ increment_path_array() do
101
+ activate_buffers_for_matching()
102
+ end
103
+
104
+ append_buffers ','
105
+ append_buffers JSON.dump(v)
106
+
107
+ trigger_block_for_matching()
108
+ @last_call = :value
109
+ end
110
+
111
+ ################################ BUFFER STUFF ###########################
112
+
113
+ # Called when we know the name of the object/array we are workign with
114
+ def activate_buffers_for_matching
115
+ @triggers.keys.each do |pattern|
116
+ debug "checking #{@key_path} matches #{pattern}"
117
+ if JsonPath.matches?(@key_path, pattern) && !@active_buffers.keys.include?(pattern)
118
+ debug ">> Activating buffer for #{pattern.inspect}"
119
+ @active_buffers[pattern] = ''
120
+ end
121
+ end
122
+ end
123
+
124
+ # To be called when exiting an object or array so the buffer is completed
125
+ def trigger_block_for_matching
126
+ active_patterns = @active_buffers.keys
127
+ active_patterns.each do |pattern|
128
+ if JsonPath.matches?(@key_path, pattern)
129
+ debug "<< Calling trigger for '#{pattern}'"
130
+ @triggers[pattern].call @active_buffers[pattern]
131
+ if pattern[-3..3] == '[*]'
132
+ @active_buffers[pattern] = ''
133
+ debug "Clearing buffer for '#{pattern}'"
134
+ else
135
+ @active_buffers.delete(pattern)
136
+ debug "Stopping buffer for '#{pattern}'"
137
+ end
138
+ end
139
+ end
140
+ end
141
+
142
+ def append_buffers(bytes)
143
+ @active_buffers.keys.each do |k|
144
+ # remove comma if it's not needed
145
+ if bytes[0] == ',' && [nil, '[', '{', ':'].include?(@active_buffers[k][-1, 1])
146
+ bytes = bytes[1..-1]
147
+ end
148
+ @active_buffers[k] += "#{bytes}"
149
+ debug "Appended to #{k} => '#{bytes}' to buffer '#{@active_buffers[k]}'"
150
+ end
151
+ end
152
+
153
+ ############################### PATH STUFF #######################
154
+
155
+ # trim off the last segment of the key_path
156
+ def trim_segment(re)
157
+ @key_path.sub!(re, '')
158
+ @key_path << '$' if @key_path == '' # add back the $ if we trimmed it off
159
+ debug " trimmed off #{re}"
160
+ end
161
+
162
+ def increment_path_array(&block)
163
+ # Increment the array index if we are in an array
164
+ # Note: Xpath indexes start at 1
165
+ # pull out the [\d] from the last array index
166
+ did_update = false
167
+ @key_path.sub!(/\[(\d*)\]$/) do |m, x|
168
+ debug "incrementing path array: #{@key_path}"
169
+ new_i = m.match(/\[(\d*)\]/)[1].to_i + 1
170
+ did_update = true
171
+ debug " new array i = #{new_i}"
172
+ '[' + new_i.to_s + ']'
173
+ end
174
+
175
+ if did_update
176
+ block.call()
177
+ end
178
+
179
+ end
180
+
181
+
182
+
183
+ def debug(msg)
184
+ indent = 60
185
+ puts msg + (" " * [0, (indent - msg.length)].max ) + "PATH: #{@key_path}" if DEBUG
186
+ end
187
+
188
+ end
metadata ADDED
@@ -0,0 +1,47 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: json_stream_trigger
3
+ version: !ruby/object:Gem::Version
4
+ version: '0.1'
5
+ platform: ruby
6
+ authors:
7
+ - Tyler Roberts
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2016-01-12 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: ''
14
+ email: code@polar-concepts.com
15
+ executables: []
16
+ extensions: []
17
+ extra_rdoc_files: []
18
+ files:
19
+ - README.md
20
+ - lib/json_path.rb
21
+ - lib/json_stream_trigger.rb
22
+ homepage: http://github.com/bdevel/json_stream_trigger
23
+ licenses:
24
+ - MIT
25
+ metadata: {}
26
+ post_install_message:
27
+ rdoc_options: []
28
+ require_paths:
29
+ - lib
30
+ required_ruby_version: !ruby/object:Gem::Requirement
31
+ requirements:
32
+ - - '>='
33
+ - !ruby/object:Gem::Version
34
+ version: '0'
35
+ required_rubygems_version: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - '>='
38
+ - !ruby/object:Gem::Version
39
+ version: '0'
40
+ requirements: []
41
+ rubyforge_project:
42
+ rubygems_version: 2.2.2
43
+ signing_key:
44
+ specification_version: 4
45
+ summary: Parse large JSON files as a stream and trigger events upon key matching.
46
+ test_files: []
47
+ has_rdoc: