json_stream_trigger 0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/README.md +76 -0
- data/lib/json_path.rb +22 -0
- data/lib/json_stream_trigger.rb +188 -0
- metadata +47 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 11dbed3cd388bb5830c6956a4792bafb172d249a
|
4
|
+
data.tar.gz: 73ff8ca8f6301a079c9ebdddb531b292e683e641
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 78a5fcc5b1a352fe79cbfbc0218d5d5f41ae3d34a83a69eb4c0cc85c94fb6004e440d0705c90de0822591b1fc6eca00ee12cf59a082386f4517a99882cc9e23b
|
7
|
+
data.tar.gz: ffbcc34b8cdb0564293d548368f13b233804819d691746ca7266b3c84759cae38b1f75e17572f5e9d6e028bd219553ef4744c2e9ec85bd6f1e2b406f9cfe3d39
|
data/README.md
ADDED
@@ -0,0 +1,76 @@
|
|
1
|
+
# Ruby `json_stream_trigger` Gem
|
2
|
+
|
3
|
+
Instead of parsing a huge JSON files and loading it into memory,
|
4
|
+
this library will stream the bytes through
|
5
|
+
[json-stream](https://github.com/dgraham/json-stream) and only
|
6
|
+
creates a small buffer for objects whose JSONPath matches a pattern you specify.
|
7
|
+
When the object is completed, the specified block will be called.
|
8
|
+
|
9
|
+
Install with `gem "json_stream_trigger"` in your Gemfile.
|
10
|
+
|
11
|
+
## Example:
|
12
|
+
|
13
|
+
```ruby
|
14
|
+
f = File.open('really_big_file.json')
|
15
|
+
stream = JsonStreamTrigger.new()
|
16
|
+
|
17
|
+
# Match each array item. If you wanted to whole array use $.data
|
18
|
+
stream.on('$.data[*]') do |json_string|
|
19
|
+
import JSON.parse(json_string, :quirks_mode => true)
|
20
|
+
end
|
21
|
+
|
22
|
+
# Will match for $.any.sub[*].item.meta
|
23
|
+
stream.on('$..meta') do |json_string|
|
24
|
+
save_meta JSON.parse(json_string, :quirks_mode => true)
|
25
|
+
end
|
26
|
+
|
27
|
+
# read in 1MB chunks
|
28
|
+
while chunk = f.read(1024)
|
29
|
+
stream << chunk
|
30
|
+
end
|
31
|
+
|
32
|
+
```
|
33
|
+
|
34
|
+
The captured JSON strinb buffer will be passed to the block. Note, Ruby's JSON library expects
|
35
|
+
JSON documents to be passed to it - not primatives - this is why `:quirks_mode => true` has been added.
|
36
|
+
|
37
|
+
## Path Details
|
38
|
+
The JSONPaths are similar to XPath notation. `$` is the root,
|
39
|
+
single wild card keys can be done with `$.*.version`,
|
40
|
+
or you can do muli-level wildcard with `$.docs..name`.
|
41
|
+
[More info on JSONPath](http://goessner.net/articles/JsonPath/)
|
42
|
+
|
43
|
+
A few more examples:
|
44
|
+
|
45
|
+
```javascript
|
46
|
+
{
|
47
|
+
meta: {version: 0.1},
|
48
|
+
docs: [
|
49
|
+
{id: 1},
|
50
|
+
{id: 2},
|
51
|
+
{id: 3},
|
52
|
+
{id: 4},
|
53
|
+
{
|
54
|
+
id: 5,
|
55
|
+
user: {
|
56
|
+
name: "Tyler"
|
57
|
+
}
|
58
|
+
}
|
59
|
+
]
|
60
|
+
}
|
61
|
+
```
|
62
|
+
|
63
|
+
```ruby
|
64
|
+
on('$.docs[*].id') # triggers for id property of every item in docs array
|
65
|
+
on('$.docs') # returns full array of items
|
66
|
+
on('$.docs[*]') # triggers for each item in the array
|
67
|
+
on('$.docs[1].id') # returns value of ID 1
|
68
|
+
on('$.docs[*].*.name') # returns 'Tyler'
|
69
|
+
on('$..name') # matches any value who's key is 'name'
|
70
|
+
```
|
71
|
+
|
72
|
+
|
73
|
+
|
74
|
+
## Tests
|
75
|
+
`rake test`
|
76
|
+
|
data/lib/json_path.rb
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
|
2
|
+
class JsonPath
|
3
|
+
|
4
|
+
def self.matches?(path, pattern)
|
5
|
+
re = self.convert_to_re(pattern)
|
6
|
+
!path.match(re).nil?
|
7
|
+
end
|
8
|
+
|
9
|
+
def self.convert_to_re(pattern)
|
10
|
+
re = pattern.dup
|
11
|
+
re = re.gsub('[', '\[').gsub(']', '\]')# escape brackets
|
12
|
+
re.gsub!(/^\$/, '^\$') # escape $ and fix it to root
|
13
|
+
re.gsub!(/ \\\[ \* \\\] /x, '\[\d+\]') # change [*] to [\d+]
|
14
|
+
re.gsub!('..', '(?<=[\.\$\]]).*[\.\]]') # change .. to match a dot, $, or ] followed by anything, and ending in a . or ]
|
15
|
+
re.gsub!('.*.', '\.[^\.\[\]]+\.') # wild card will match any key
|
16
|
+
|
17
|
+
re += '(?=$)' #'(?=$|\.)
|
18
|
+
return Regexp.new(re)
|
19
|
+
end
|
20
|
+
|
21
|
+
end
|
22
|
+
|
@@ -0,0 +1,188 @@
|
|
1
|
+
require 'json'
|
2
|
+
require 'json/stream'
|
3
|
+
require_relative 'json_path'
|
4
|
+
|
5
|
+
class JsonStreamTrigger
|
6
|
+
attr_reader :key_path, :triggers, :full_buffer
|
7
|
+
DEBUG=false
|
8
|
+
|
9
|
+
def initialize()
|
10
|
+
@parser = JSON::Stream::Parser.new
|
11
|
+
|
12
|
+
@parser.start_object &method(:start_object)
|
13
|
+
@parser.end_object &method(:end_object)
|
14
|
+
@parser.start_array &method(:start_array)
|
15
|
+
@parser.end_array &method(:end_array)
|
16
|
+
@parser.key &method(:key)
|
17
|
+
@parser.value &method(:value)
|
18
|
+
|
19
|
+
@last_call = nil
|
20
|
+
@key_path = ''
|
21
|
+
@triggers = {}
|
22
|
+
@active_buffers = {}
|
23
|
+
@full_buffer = ''
|
24
|
+
end
|
25
|
+
|
26
|
+
def on(pattern, &block)
|
27
|
+
@triggers[pattern] = block
|
28
|
+
end
|
29
|
+
|
30
|
+
def <<(bytes)
|
31
|
+
debug "bytes: #{bytes.inspect}"
|
32
|
+
@parser << bytes
|
33
|
+
@full_buffer << bytes if DEBUG
|
34
|
+
end
|
35
|
+
|
36
|
+
def path_matches?(pattern)
|
37
|
+
JsonPath.matches?(@key_path, pattern)
|
38
|
+
end
|
39
|
+
|
40
|
+
protected
|
41
|
+
|
42
|
+
|
43
|
+
################################ PARSING TRIGGERS ###########################
|
44
|
+
#def start_document
|
45
|
+
#end
|
46
|
+
#def end_document
|
47
|
+
#end
|
48
|
+
|
49
|
+
def start_object
|
50
|
+
debug "start object"
|
51
|
+
@key_path << (@key_path.empty? ? "$" : '')
|
52
|
+
increment_path_array() do
|
53
|
+
activate_buffers_for_matching()
|
54
|
+
end
|
55
|
+
append_buffers ',{'
|
56
|
+
@last_call = :start_object
|
57
|
+
end
|
58
|
+
|
59
|
+
def end_object
|
60
|
+
debug "end object"
|
61
|
+
append_buffers '}' if @last_call == :start_object
|
62
|
+
trigger_block_for_matching()
|
63
|
+
append_buffers '}' if @last_call != :start_object
|
64
|
+
trim_segment(/[\.\$][^\.\[\[]+$/) # remove last .key
|
65
|
+
@last_call = :end_object
|
66
|
+
end
|
67
|
+
|
68
|
+
def start_array
|
69
|
+
debug "start array"
|
70
|
+
increment_path_array() do
|
71
|
+
activate_buffers_for_matching()
|
72
|
+
end
|
73
|
+
append_buffers ',['
|
74
|
+
@key_path << (@key_path.empty? ? "$[]" : "[]")
|
75
|
+
activate_buffers_for_matching()
|
76
|
+
@last_call = :start_array
|
77
|
+
end
|
78
|
+
|
79
|
+
def end_array
|
80
|
+
debug "end array"
|
81
|
+
append_buffers ']'
|
82
|
+
|
83
|
+
trim_segment(/\[\d*\]+$/) # remove last [\d] and check triggers to match .my-array
|
84
|
+
trigger_block_for_matching()
|
85
|
+
trim_segment(/[\.$][^\.\[\]]+$/) # remove last .my-array
|
86
|
+
@last_call = :end_array
|
87
|
+
end
|
88
|
+
|
89
|
+
def key(k)
|
90
|
+
debug "new key '#{k}'"
|
91
|
+
trim_segment(/\.[^\.\[\]]+$/) unless @last_call == :start_object# remove last .key[\d]
|
92
|
+
@key_path << ".#{k}"
|
93
|
+
append_buffers ",\"#{k}\":"
|
94
|
+
activate_buffers_for_matching()
|
95
|
+
@last_call = :key
|
96
|
+
end
|
97
|
+
|
98
|
+
def value(v)
|
99
|
+
debug "value '#{v}'"
|
100
|
+
increment_path_array() do
|
101
|
+
activate_buffers_for_matching()
|
102
|
+
end
|
103
|
+
|
104
|
+
append_buffers ','
|
105
|
+
append_buffers JSON.dump(v)
|
106
|
+
|
107
|
+
trigger_block_for_matching()
|
108
|
+
@last_call = :value
|
109
|
+
end
|
110
|
+
|
111
|
+
################################ BUFFER STUFF ###########################
|
112
|
+
|
113
|
+
# Called when we know the name of the object/array we are workign with
|
114
|
+
def activate_buffers_for_matching
|
115
|
+
@triggers.keys.each do |pattern|
|
116
|
+
debug "checking #{@key_path} matches #{pattern}"
|
117
|
+
if JsonPath.matches?(@key_path, pattern) && !@active_buffers.keys.include?(pattern)
|
118
|
+
debug ">> Activating buffer for #{pattern.inspect}"
|
119
|
+
@active_buffers[pattern] = ''
|
120
|
+
end
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
124
|
+
# To be called when exiting an object or array so the buffer is completed
|
125
|
+
def trigger_block_for_matching
|
126
|
+
active_patterns = @active_buffers.keys
|
127
|
+
active_patterns.each do |pattern|
|
128
|
+
if JsonPath.matches?(@key_path, pattern)
|
129
|
+
debug "<< Calling trigger for '#{pattern}'"
|
130
|
+
@triggers[pattern].call @active_buffers[pattern]
|
131
|
+
if pattern[-3..3] == '[*]'
|
132
|
+
@active_buffers[pattern] = ''
|
133
|
+
debug "Clearing buffer for '#{pattern}'"
|
134
|
+
else
|
135
|
+
@active_buffers.delete(pattern)
|
136
|
+
debug "Stopping buffer for '#{pattern}'"
|
137
|
+
end
|
138
|
+
end
|
139
|
+
end
|
140
|
+
end
|
141
|
+
|
142
|
+
def append_buffers(bytes)
|
143
|
+
@active_buffers.keys.each do |k|
|
144
|
+
# remove comma if it's not needed
|
145
|
+
if bytes[0] == ',' && [nil, '[', '{', ':'].include?(@active_buffers[k][-1, 1])
|
146
|
+
bytes = bytes[1..-1]
|
147
|
+
end
|
148
|
+
@active_buffers[k] += "#{bytes}"
|
149
|
+
debug "Appended to #{k} => '#{bytes}' to buffer '#{@active_buffers[k]}'"
|
150
|
+
end
|
151
|
+
end
|
152
|
+
|
153
|
+
############################### PATH STUFF #######################
|
154
|
+
|
155
|
+
# trim off the last segment of the key_path
|
156
|
+
def trim_segment(re)
|
157
|
+
@key_path.sub!(re, '')
|
158
|
+
@key_path << '$' if @key_path == '' # add back the $ if we trimmed it off
|
159
|
+
debug " trimmed off #{re}"
|
160
|
+
end
|
161
|
+
|
162
|
+
def increment_path_array(&block)
|
163
|
+
# Increment the array index if we are in an array
|
164
|
+
# Note: Xpath indexes start at 1
|
165
|
+
# pull out the [\d] from the last array index
|
166
|
+
did_update = false
|
167
|
+
@key_path.sub!(/\[(\d*)\]$/) do |m, x|
|
168
|
+
debug "incrementing path array: #{@key_path}"
|
169
|
+
new_i = m.match(/\[(\d*)\]/)[1].to_i + 1
|
170
|
+
did_update = true
|
171
|
+
debug " new array i = #{new_i}"
|
172
|
+
'[' + new_i.to_s + ']'
|
173
|
+
end
|
174
|
+
|
175
|
+
if did_update
|
176
|
+
block.call()
|
177
|
+
end
|
178
|
+
|
179
|
+
end
|
180
|
+
|
181
|
+
|
182
|
+
|
183
|
+
def debug(msg)
|
184
|
+
indent = 60
|
185
|
+
puts msg + (" " * [0, (indent - msg.length)].max ) + "PATH: #{@key_path}" if DEBUG
|
186
|
+
end
|
187
|
+
|
188
|
+
end
|
metadata
ADDED
@@ -0,0 +1,47 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: json_stream_trigger
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: '0.1'
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Tyler Roberts
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2016-01-12 00:00:00.000000000 Z
|
12
|
+
dependencies: []
|
13
|
+
description: ''
|
14
|
+
email: code@polar-concepts.com
|
15
|
+
executables: []
|
16
|
+
extensions: []
|
17
|
+
extra_rdoc_files: []
|
18
|
+
files:
|
19
|
+
- README.md
|
20
|
+
- lib/json_path.rb
|
21
|
+
- lib/json_stream_trigger.rb
|
22
|
+
homepage: http://github.com/bdevel/json_stream_trigger
|
23
|
+
licenses:
|
24
|
+
- MIT
|
25
|
+
metadata: {}
|
26
|
+
post_install_message:
|
27
|
+
rdoc_options: []
|
28
|
+
require_paths:
|
29
|
+
- lib
|
30
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
31
|
+
requirements:
|
32
|
+
- - '>='
|
33
|
+
- !ruby/object:Gem::Version
|
34
|
+
version: '0'
|
35
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
36
|
+
requirements:
|
37
|
+
- - '>='
|
38
|
+
- !ruby/object:Gem::Version
|
39
|
+
version: '0'
|
40
|
+
requirements: []
|
41
|
+
rubyforge_project:
|
42
|
+
rubygems_version: 2.2.2
|
43
|
+
signing_key:
|
44
|
+
specification_version: 4
|
45
|
+
summary: Parse large JSON files as a stream and trigger events upon key matching.
|
46
|
+
test_files: []
|
47
|
+
has_rdoc:
|