json_extractor 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/LICENSE.txt +20 -0
- data/README.md +63 -0
- data/ext/json_extractor/extconf.rb +3 -0
- data/ext/json_extractor/json_extractor.c +215 -0
- data/lib/json_extractor.rb +9 -0
- metadata +105 -0
data/LICENSE.txt
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2013 Brad Heller
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,63 @@
|
|
1
|
+
# JSON Extractor
|
2
|
+
|
3
|
+
Pull out specific objects from a large JSON document by key without having to
|
4
|
+
deserialize the whole object in to Ruby.
|
5
|
+
|
6
|
+
**NOTE:** This gem only supports extracting objects from JSON documents.
|
7
|
+
Strings, numbers, and arrays are not supported. Yet.
|
8
|
+
|
9
|
+
## Example
|
10
|
+
|
11
|
+
Assume that you have a JSON document:
|
12
|
+
|
13
|
+
``` json
|
14
|
+
{
|
15
|
+
"number": 6,
|
16
|
+
"name": "Darlington Nagbe",
|
17
|
+
"position": "midfielder",
|
18
|
+
"statistics": {
|
19
|
+
"2012": {
|
20
|
+
"gp": 33,
|
21
|
+
"gs": 31,
|
22
|
+
"g": 6,
|
23
|
+
"min": 2777,
|
24
|
+
"a": 1,
|
25
|
+
"sht": 50,
|
26
|
+
"sog": 16,
|
27
|
+
"fc": 16,
|
28
|
+
"off": 6,
|
29
|
+
"y": 0,
|
30
|
+
"r": 0
|
31
|
+
}
|
32
|
+
}
|
33
|
+
}
|
34
|
+
```
|
35
|
+
|
36
|
+
The JSON Extractor API will let you pull out a specific object withing that
|
37
|
+
file.
|
38
|
+
|
39
|
+
``` bash
|
40
|
+
$ irb
|
41
|
+
1.9.3p374 :002 > require 'json_extractor'
|
42
|
+
=> false
|
43
|
+
1.9.3p374 :001 > JSONExtractor.subdocument("/path/to/data.json", "statistics")
|
44
|
+
=> {"2012"=>{"gp"=>33, "gs"=>31, "g"=>6, "min"=>2777, "a"=>1, "sht"=>50, "sog"=>16, "fc"=>16, "off"=>6, "y"=>0, "r"=>0}}
|
45
|
+
```
|
46
|
+
|
47
|
+
## How does it work?
|
48
|
+
|
49
|
+
The actual implementation is done in C. The whole file is read into memory, the
|
50
|
+
key is found somewhere within the file, and the object associated with that key
|
51
|
+
is extracted using a simple recursive descent parser that's only aware of the
|
52
|
+
semantics of an object.
|
53
|
+
|
54
|
+
## Contributing to JSON Extractor
|
55
|
+
|
56
|
+
* Check out the latest master to make sure the feature hasn't been implemented or the bug hasn't been fixed yet.
|
57
|
+
* Check out the issue tracker to make sure someone already hasn't requested it and/or contributed it.
|
58
|
+
* Fork the project.
|
59
|
+
* Start a feature/bugfix branch.
|
60
|
+
* Commit and push until you are happy with your contribution.
|
61
|
+
* Make sure to add tests for it. This is important so I don't break it in a future version unintentionally.
|
62
|
+
* Please try not to mess with the Rakefile, version, or history. If you want to have your own version, or is otherwise necessary, that is fine, but please isolate to its own commit so I can cherry-pick around it.
|
63
|
+
|
@@ -0,0 +1,215 @@
|
|
1
|
+
#include <ruby.h>
|
2
|
+
|
3
|
+
// Needed for isspace.
|
4
|
+
#include <ctype.h>
|
5
|
+
|
6
|
+
#define PARSER_FUNC(fn) unsigned int fn(char *, long, unsigned int)
|
7
|
+
|
8
|
+
PARSER_FUNC(object);
|
9
|
+
PARSER_FUNC(object_body);
|
10
|
+
PARSER_FUNC(start);
|
11
|
+
PARSER_FUNC(whitespace);
|
12
|
+
|
13
|
+
/**
|
14
|
+
* Reads the entire file pointed to by filename in to memory and returns a
|
15
|
+
* pointer. The caller is responsible for freeing the pointer.
|
16
|
+
*
|
17
|
+
* @param [const char*] filename the filename
|
18
|
+
* @return [char *] a buffer with the file's context. Must be free'd by caller.
|
19
|
+
*/
|
20
|
+
char * read_all(const char * filename) {
|
21
|
+
long pos, actual;
|
22
|
+
char *data;
|
23
|
+
FILE *fp;
|
24
|
+
|
25
|
+
fp = fopen(filename, "rb");
|
26
|
+
|
27
|
+
if(!fp) {
|
28
|
+
return NULL;
|
29
|
+
}
|
30
|
+
|
31
|
+
fseek(fp, 0L, SEEK_END);
|
32
|
+
pos = ftell(fp);
|
33
|
+
rewind(fp);
|
34
|
+
|
35
|
+
// Read the whole damn thing in to memory!
|
36
|
+
data = malloc(pos);
|
37
|
+
memset(data, '\0', pos);
|
38
|
+
|
39
|
+
actual = fread(data, sizeof(char), pos, fp);
|
40
|
+
|
41
|
+
if(actual != pos) {
|
42
|
+
free(data);
|
43
|
+
return NULL;
|
44
|
+
}
|
45
|
+
|
46
|
+
return data;
|
47
|
+
}
|
48
|
+
|
49
|
+
/**
|
50
|
+
* Finds the end of the whitespace that is currently under pos within str.
|
51
|
+
*
|
52
|
+
* @param [char *] str the string to search.
|
53
|
+
* @param [long] len the overall length of the string.
|
54
|
+
* @param [unsigned int] pos the initial position (i.e. where to start)
|
55
|
+
* @return [unsigned int] the end of the whitespace
|
56
|
+
*/
|
57
|
+
unsigned int whitespace(char * str, long len, unsigned int pos) {
|
58
|
+
while(isspace(str[pos]) && pos < len) {
|
59
|
+
pos++;
|
60
|
+
}
|
61
|
+
|
62
|
+
if(pos < len) {
|
63
|
+
return pos;
|
64
|
+
} else {
|
65
|
+
return -1;
|
66
|
+
}
|
67
|
+
}
|
68
|
+
|
69
|
+
/**
|
70
|
+
* Finds the end of the current object body that is under pos within str.
|
71
|
+
*
|
72
|
+
* @param [char *] str the string to search.
|
73
|
+
* @param [long] len the overall length of the string.
|
74
|
+
* @param [unsigned int] pos the initial position (i.e. where to start)
|
75
|
+
* @return [unsigned int] the end of the object body.
|
76
|
+
*/
|
77
|
+
unsigned int object_body(char * str, long len, unsigned int pos) {
|
78
|
+
while(pos < len) {
|
79
|
+
if(str[pos+1] == '{') {
|
80
|
+
pos = object(str, len, pos+1);
|
81
|
+
}
|
82
|
+
|
83
|
+
if(str[pos+1] == '}') {
|
84
|
+
return pos;
|
85
|
+
}
|
86
|
+
|
87
|
+
pos++;
|
88
|
+
}
|
89
|
+
|
90
|
+
// We ran out--it was unescaped.
|
91
|
+
return(-1);
|
92
|
+
}
|
93
|
+
|
94
|
+
/**
|
95
|
+
* Finds the end of the current object that is under pos within str.
|
96
|
+
*
|
97
|
+
* @param [char *] str the string to search.
|
98
|
+
* @param [long] len the overall length of the string.
|
99
|
+
* @param [unsigned int] pos the initial position (i.e. where to start)
|
100
|
+
* @return [unsigned int] the end of the object.
|
101
|
+
*/
|
102
|
+
unsigned int object(char * str, long len, unsigned int pos) {
|
103
|
+
if(str[pos] != '{') {
|
104
|
+
// This is an error--we should tell someone, me thinks.
|
105
|
+
return -1;
|
106
|
+
}
|
107
|
+
|
108
|
+
pos = object_body(str, len, pos+1);
|
109
|
+
|
110
|
+
if(str[pos+1] == '}') {
|
111
|
+
return pos+1;
|
112
|
+
}
|
113
|
+
|
114
|
+
// Something else went wrong.
|
115
|
+
return(-1);
|
116
|
+
}
|
117
|
+
|
118
|
+
/**
|
119
|
+
* Starts parsing.
|
120
|
+
*
|
121
|
+
* @param [char *] str the string to search.
|
122
|
+
* @param [long] len the overall length of the string.
|
123
|
+
* @param [unsigned int] pos the initial position (i.e. where to start)
|
124
|
+
* @return [unsigned int] the end of the first-encountered object.
|
125
|
+
*/
|
126
|
+
unsigned int start(char * str, long len, unsigned int pos) {
|
127
|
+
pos = whitespace(str, len, pos);
|
128
|
+
|
129
|
+
if(str[pos] == '{') {
|
130
|
+
return object(str, len, pos);
|
131
|
+
}
|
132
|
+
|
133
|
+
return -1;
|
134
|
+
}
|
135
|
+
|
136
|
+
int find_subdocument(char * str, unsigned int pos) {
|
137
|
+
if(str == NULL) {
|
138
|
+
return -1;
|
139
|
+
}
|
140
|
+
|
141
|
+
if(pos >= strlen(str)) {
|
142
|
+
return -1;
|
143
|
+
}
|
144
|
+
|
145
|
+
return start(str, strlen(str), pos);
|
146
|
+
}
|
147
|
+
|
148
|
+
char * extract_subdocument(char * data, const char * key) {
|
149
|
+
char *full_key, *pos, *final;
|
150
|
+
int end;
|
151
|
+
|
152
|
+
full_key = malloc(strlen(key) + 4);
|
153
|
+
memset(full_key, '\0', strlen(key) + 4);
|
154
|
+
|
155
|
+
sprintf(full_key, "\"%s\":", key);
|
156
|
+
pos = strstr(data, full_key);
|
157
|
+
|
158
|
+
if(pos == NULL) {
|
159
|
+
free(full_key);
|
160
|
+
return NULL;
|
161
|
+
}
|
162
|
+
|
163
|
+
// Let's find the end of the document.
|
164
|
+
end = find_subdocument(&pos[strlen(full_key)], 0);
|
165
|
+
|
166
|
+
if(end < 0) {
|
167
|
+
final = NULL;
|
168
|
+
} else {
|
169
|
+
// Since we get a position, it's actually off by one from the original
|
170
|
+
// position.
|
171
|
+
final = malloc(end+2);
|
172
|
+
memset(final, '\0', end+2);
|
173
|
+
memcpy(final, &pos[strlen(full_key)], end+1);
|
174
|
+
}
|
175
|
+
|
176
|
+
free(full_key);
|
177
|
+
|
178
|
+
return final;
|
179
|
+
}
|
180
|
+
|
181
|
+
static VALUE rb_extract_subdocument(VALUE self, VALUE str, VALUE key) {
|
182
|
+
char *data, *substr;
|
183
|
+
VALUE result;
|
184
|
+
|
185
|
+
// No str?
|
186
|
+
if(str == Qnil) {
|
187
|
+
return Qnil;
|
188
|
+
}
|
189
|
+
|
190
|
+
// No key?
|
191
|
+
if(key == Qnil) {
|
192
|
+
return Qnil;
|
193
|
+
}
|
194
|
+
|
195
|
+
data = read_all(RSTRING_PTR(str));
|
196
|
+
substr = extract_subdocument(data, RSTRING_PTR(key));
|
197
|
+
free(data);
|
198
|
+
|
199
|
+
if(substr == NULL) {
|
200
|
+
return Qnil;
|
201
|
+
}
|
202
|
+
|
203
|
+
result = rb_str_new2(substr);
|
204
|
+
|
205
|
+
// TODO: Figure out if this is right. It behaves properly, so I'm assuming
|
206
|
+
// it's OK.
|
207
|
+
free(substr);
|
208
|
+
|
209
|
+
return result;
|
210
|
+
}
|
211
|
+
|
212
|
+
void Init_json_extractor(void) {
|
213
|
+
VALUE rb_mJSONExtractor = rb_define_module("JSONExtractor");
|
214
|
+
rb_define_singleton_method(rb_mJSONExtractor, "extract_subdocument", rb_extract_subdocument, 2);
|
215
|
+
}
|
metadata
ADDED
@@ -0,0 +1,105 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: json_extractor
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.0.0
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Brad Heller
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2013-06-30 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: bundler
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '0'
|
22
|
+
type: :development
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ! '>='
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: '0'
|
30
|
+
- !ruby/object:Gem::Dependency
|
31
|
+
name: jeweler
|
32
|
+
requirement: !ruby/object:Gem::Requirement
|
33
|
+
none: false
|
34
|
+
requirements:
|
35
|
+
- - ~>
|
36
|
+
- !ruby/object:Gem::Version
|
37
|
+
version: 1.8.4
|
38
|
+
type: :development
|
39
|
+
prerelease: false
|
40
|
+
version_requirements: !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - ~>
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: 1.8.4
|
46
|
+
- !ruby/object:Gem::Dependency
|
47
|
+
name: rake-compiler
|
48
|
+
requirement: !ruby/object:Gem::Requirement
|
49
|
+
none: false
|
50
|
+
requirements:
|
51
|
+
- - ! '>='
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: '0'
|
54
|
+
type: :development
|
55
|
+
prerelease: false
|
56
|
+
version_requirements: !ruby/object:Gem::Requirement
|
57
|
+
none: false
|
58
|
+
requirements:
|
59
|
+
- - ! '>='
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
description: A set of C extensions that can extract specific keys from a JSON document.
|
63
|
+
Right now, only supports extracting objects.
|
64
|
+
email: brad@cloudability.com
|
65
|
+
executables: []
|
66
|
+
extensions:
|
67
|
+
- ext/json_extractor/extconf.rb
|
68
|
+
extra_rdoc_files:
|
69
|
+
- LICENSE.txt
|
70
|
+
- README.md
|
71
|
+
files:
|
72
|
+
- ext/json_extractor/extconf.rb
|
73
|
+
- ext/json_extractor/json_extractor.c
|
74
|
+
- lib/json_extractor.rb
|
75
|
+
- LICENSE.txt
|
76
|
+
- README.md
|
77
|
+
homepage: http://github.com/bradhe/json_extractor
|
78
|
+
licenses:
|
79
|
+
- MIT
|
80
|
+
post_install_message:
|
81
|
+
rdoc_options: []
|
82
|
+
require_paths:
|
83
|
+
- lib
|
84
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
85
|
+
none: false
|
86
|
+
requirements:
|
87
|
+
- - ! '>='
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '0'
|
90
|
+
segments:
|
91
|
+
- 0
|
92
|
+
hash: -2562154935140798148
|
93
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
94
|
+
none: false
|
95
|
+
requirements:
|
96
|
+
- - ! '>='
|
97
|
+
- !ruby/object:Gem::Version
|
98
|
+
version: '0'
|
99
|
+
requirements: []
|
100
|
+
rubyforge_project:
|
101
|
+
rubygems_version: 1.8.24
|
102
|
+
signing_key:
|
103
|
+
specification_version: 3
|
104
|
+
summary: Tools for extracting JSON without having to deserialize it.
|
105
|
+
test_files: []
|