mortar 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (61) hide show
  1. data/README.md +36 -0
  2. data/bin/mortar +13 -0
  3. data/lib/mortar.rb +23 -0
  4. data/lib/mortar/auth.rb +312 -0
  5. data/lib/mortar/cli.rb +54 -0
  6. data/lib/mortar/command.rb +267 -0
  7. data/lib/mortar/command/auth.rb +96 -0
  8. data/lib/mortar/command/base.rb +319 -0
  9. data/lib/mortar/command/clusters.rb +41 -0
  10. data/lib/mortar/command/describe.rb +97 -0
  11. data/lib/mortar/command/generate.rb +121 -0
  12. data/lib/mortar/command/help.rb +166 -0
  13. data/lib/mortar/command/illustrate.rb +97 -0
  14. data/lib/mortar/command/jobs.rb +174 -0
  15. data/lib/mortar/command/pigscripts.rb +45 -0
  16. data/lib/mortar/command/projects.rb +128 -0
  17. data/lib/mortar/command/validate.rb +94 -0
  18. data/lib/mortar/command/version.rb +42 -0
  19. data/lib/mortar/errors.rb +24 -0
  20. data/lib/mortar/generators/generator_base.rb +107 -0
  21. data/lib/mortar/generators/macro_generator.rb +37 -0
  22. data/lib/mortar/generators/pigscript_generator.rb +40 -0
  23. data/lib/mortar/generators/project_generator.rb +67 -0
  24. data/lib/mortar/generators/udf_generator.rb +28 -0
  25. data/lib/mortar/git.rb +233 -0
  26. data/lib/mortar/helpers.rb +488 -0
  27. data/lib/mortar/project.rb +156 -0
  28. data/lib/mortar/snapshot.rb +39 -0
  29. data/lib/mortar/templates/macro/macro.pig +14 -0
  30. data/lib/mortar/templates/pigscript/pigscript.pig +38 -0
  31. data/lib/mortar/templates/pigscript/python_udf.py +13 -0
  32. data/lib/mortar/templates/project/Gemfile +3 -0
  33. data/lib/mortar/templates/project/README.md +8 -0
  34. data/lib/mortar/templates/project/gitignore +4 -0
  35. data/lib/mortar/templates/project/macros/gitkeep +0 -0
  36. data/lib/mortar/templates/project/pigscripts/pigscript.pig +35 -0
  37. data/lib/mortar/templates/project/udfs/python/python_udf.py +13 -0
  38. data/lib/mortar/templates/udf/python_udf.py +13 -0
  39. data/lib/mortar/version.rb +20 -0
  40. data/lib/vendor/mortar/okjson.rb +598 -0
  41. data/lib/vendor/mortar/uuid.rb +312 -0
  42. data/spec/mortar/auth_spec.rb +156 -0
  43. data/spec/mortar/command/auth_spec.rb +46 -0
  44. data/spec/mortar/command/base_spec.rb +82 -0
  45. data/spec/mortar/command/clusters_spec.rb +61 -0
  46. data/spec/mortar/command/describe_spec.rb +135 -0
  47. data/spec/mortar/command/generate_spec.rb +139 -0
  48. data/spec/mortar/command/illustrate_spec.rb +140 -0
  49. data/spec/mortar/command/jobs_spec.rb +364 -0
  50. data/spec/mortar/command/pigscripts_spec.rb +70 -0
  51. data/spec/mortar/command/projects_spec.rb +165 -0
  52. data/spec/mortar/command/validate_spec.rb +119 -0
  53. data/spec/mortar/command_spec.rb +122 -0
  54. data/spec/mortar/git_spec.rb +278 -0
  55. data/spec/mortar/helpers_spec.rb +82 -0
  56. data/spec/mortar/project_spec.rb +76 -0
  57. data/spec/mortar/snapshot_spec.rb +46 -0
  58. data/spec/spec.opts +1 -0
  59. data/spec/spec_helper.rb +278 -0
  60. data/spec/support/display_message_matcher.rb +68 -0
  61. metadata +259 -0
@@ -0,0 +1,156 @@
1
+ #
2
+ # Copyright 2012 Mortar Data Inc.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ #
16
+
17
+ require 'fileutils'
18
+
19
+ module Mortar
20
+ module Project
21
+ class ProjectError < RuntimeError; end
22
+
23
+ class Project
24
+ def self.required_directories
25
+ ["macros", "pigscripts", "udfs"]
26
+ end
27
+
28
+ attr_accessor :name
29
+ attr_accessor :remote
30
+ attr_accessor :root_path
31
+
32
+ def initialize(name, root_path, remote)
33
+ @name = name
34
+ @root_path = root_path
35
+ @remote = remote
36
+ end
37
+
38
+ def python_udfs_path
39
+ File.join(@root_path, "udfs/python")
40
+ end
41
+
42
+ def python_udfs
43
+ @python_udfs ||= PythonUDFs.new(
44
+ python_udfs_path,
45
+ "python",
46
+ ".py")
47
+ end
48
+
49
+ def pigscripts_path
50
+ File.join(@root_path, "pigscripts")
51
+ end
52
+
53
+ def pigscripts
54
+ @pigscripts ||= PigScripts.new(
55
+ pigscripts_path,
56
+ "pigscripts",
57
+ ".pig")
58
+ @pigscripts
59
+ end
60
+
61
+ def tmp_path
62
+ path = File.join(@root_path, "tmp")
63
+ unless File.directory? path
64
+ FileUtils.mkdir_p path
65
+ end
66
+ path
67
+ end
68
+ end
69
+
70
+ class ProjectEntity
71
+
72
+ include Enumerable
73
+
74
+ def initialize(path, name, filename_extension)
75
+ @path = path
76
+ @name = name
77
+ @filename_extension = filename_extension
78
+ @elements = elements
79
+ end
80
+
81
+ def method_missing(method, *args)
82
+ method_name = method.to_s
83
+ return @elements[method_name] if @elements[method_name]
84
+ super
85
+ end
86
+
87
+ def each
88
+ @elements.each {|key, value| yield [key, value]}
89
+ end
90
+
91
+ def [](key)
92
+ @elements[key]
93
+ end
94
+
95
+ def keys
96
+ @elements.keys
97
+ end
98
+
99
+ protected
100
+
101
+ def element_name(element_path)
102
+ File.basename(element_path, @filename_extension)
103
+ end
104
+
105
+ def elements
106
+ unless File.directory? @path
107
+ raise ProjectError, "Unable to find #{@name} directory in project"
108
+ end
109
+
110
+ # get {script_name => full_path}
111
+ file_paths = Dir[File.join(@path, "**", "*#{@filename_extension}")]
112
+ file_paths_hsh = file_paths.collect{|element_path| [element_name(element_path), element(element_name(element_path), element_path)]}.flatten
113
+ Hash[*file_paths_hsh]
114
+ end
115
+
116
+ def element(path)
117
+ raise NotImplementedError, "Implement in subclass"
118
+ end
119
+ end
120
+
121
+ class PigScripts < ProjectEntity
122
+ def element(name, path)
123
+ Script.new(name, path)
124
+ end
125
+ end
126
+
127
+ class PythonUDFs < ProjectEntity
128
+ def element(name, path)
129
+ Script.new(name, path)
130
+ end
131
+ end
132
+
133
+ class Script
134
+
135
+ attr_reader :name
136
+ attr_reader :path
137
+
138
+ def initialize(name, path)
139
+ @name = name
140
+ @path = path
141
+ end
142
+
143
+ def code
144
+ script_file = File.open(@path, "r")
145
+ script_contents = script_file.read
146
+ script_file.close
147
+ script_contents
148
+ end
149
+
150
+ def to_s
151
+ code
152
+ end
153
+ end
154
+
155
+ end
156
+ end
@@ -0,0 +1,39 @@
1
+ #
2
+ # Copyright 2012 Mortar Data Inc.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ #
16
+
17
+ module Mortar
18
+ module Snapshot
19
+
20
+ extend self
21
+
22
+ def create_and_push_snapshot_branch(git, project)
23
+ # create / push a snapshot branch
24
+ snapshot_branch = action("Taking code snapshot") do
25
+ git.create_snapshot_branch()
26
+ end
27
+
28
+ git_ref = action("Sending code snapshot to Mortar") do
29
+ # push the code
30
+ git.push(project.remote, snapshot_branch)
31
+
32
+ # grab the commit hash and clean out the branch from the local branches
33
+ ref = git.git_ref(snapshot_branch)
34
+ git.branch_delete(snapshot_branch)
35
+ ref
36
+ end
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,14 @@
1
+ /**
2
+ * <%= macro_name %>: Pig macros for use in pigscripts.
3
+ *
4
+ */
5
+
6
+ /**
7
+ * A simple example macro function that returns the entity passed in.
8
+ */
9
+ DEFINE <%= macro_name.capitalize %>_EXAMPLE(input_relation)
10
+ returns output_relation {
11
+ -- just an example
12
+ $output_relation = FOREACH $input_relation
13
+ GENERATE *;
14
+ };
@@ -0,0 +1,38 @@
1
+ /**
2
+ * <%= script_name %>
3
+ *
4
+ * Required parameters:
5
+ *
6
+ * -param INPUT_PATH Input path for script data (e.g. s3n://hawk-example-data/tutorial/excite.log.bz2)
7
+ * -param OUTPUT_PATH Output path for script data (e.g. s3n://my-output-bucket/<%= script_name %>)
8
+ */
9
+
10
+ <% if not options[:skip_udf] %>
11
+ /**
12
+ * User-Defined Functions (UDFs)
13
+ */
14
+
15
+ REGISTER '../udfs/python/<%= script_name %>.py' using streaming_python as <%= script_name %>;
16
+ <% end %>
17
+
18
+ -- This is an example of loading up input data
19
+ my_input_data = LOAD '$INPUT_PATH'
20
+ USING PigStorage('\t')
21
+ AS (field0:chararray, field1:chararray, field2:chararray);
22
+
23
+ -- This is an example pig operation
24
+ filtered = FILTER my_input_data
25
+ BY field0 IS NOT NULL;
26
+
27
+ -- This is an example call to a python user-defined function
28
+ with_udf_output = FOREACH filtered
29
+ GENERATE field0..field2,
30
+ <%= script_name %>.example_udf(field0) AS example_udf_field;
31
+
32
+ -- remove any existing data
33
+ rmf $OUTPUT_PATH;
34
+
35
+ -- store the results
36
+ STORE with_udf_output
37
+ INTO '$OUTPUT_PATH'
38
+ USING PigStorage('\t');
@@ -0,0 +1,13 @@
1
+ from pig_util import outputSchema
2
+
3
+ #
4
+ # This is where we write python UDFs (User-Defined Functions) that we can call from pig.
5
+ # Pig needs to know the schema of the data coming out of the function,
6
+ # which we specify using the @outputSchema decorator.
7
+ #
8
+ @outputSchema('example_udf:int')
9
+ def example_udf(input_str):
10
+ """
11
+ A simple example function that just returns the length of the string passed in.
12
+ """
13
+ return len(input_str) if input_str else None
@@ -0,0 +1,3 @@
1
+ source "http://rubygems.org"
2
+
3
+ gem "mortar"
@@ -0,0 +1,8 @@
1
+ # Weclome to Mortar!
2
+
3
+ Copy TK
4
+
5
+
6
+ # Getting Started
7
+
8
+ Copy TK
@@ -0,0 +1,4 @@
1
+ *.pyc
2
+ *.class
3
+ *.log
4
+ tmp
File without changes
@@ -0,0 +1,35 @@
1
+ /**
2
+ * <%= project_name %>
3
+ *
4
+ * Required parameters:
5
+ *
6
+ * -param INPUT_PATH Input path for script data (e.g. s3n://hawk-example-data/tutorial/excite.log.bz2)
7
+ * -param OUTPUT_PATH Output path for script data (e.g. s3n://my-output-bucket/<%= project_name %>)
8
+ */
9
+
10
+ /**
11
+ * User-Defined Functions (UDFs)
12
+ */
13
+ REGISTER '../udfs/python/<%= project_name %>.py' using streaming_python as <%= project_name %>;
14
+
15
+ -- This is an example of loading up input data
16
+ my_input_data = LOAD '$INPUT_PATH'
17
+ USING PigStorage('\t')
18
+ AS (field0:chararray, field1:chararray, field2:chararray);
19
+
20
+ -- This is an example pig operation
21
+ filtered = FILTER my_input_data
22
+ BY field0 IS NOT NULL;
23
+
24
+ -- This is an example call to a python user-defined function
25
+ with_udf_output = FOREACH filtered
26
+ GENERATE field0..field2,
27
+ <%= project_name %>.example_udf(field0) AS example_udf_field;
28
+
29
+ -- remove any existing data
30
+ rmf $OUTPUT_PATH;
31
+
32
+ -- store the results
33
+ STORE with_udf_output
34
+ INTO '$OUTPUT_PATH'
35
+ USING PigStorage('\t');
@@ -0,0 +1,13 @@
1
+ from pig_util import outputSchema
2
+
3
+ #
4
+ # This is where we write python UDFs (User-Defined Functions) that we can call from pig.
5
+ # Pig needs to know the schema of the data coming out of the function,
6
+ # which we specify using the @outputSchema decorator.
7
+ #
8
+ @outputSchema('example_udf:int')
9
+ def example_udf(input_str):
10
+ """
11
+ A simple example function that just returns the length of the string passed in.
12
+ """
13
+ return len(input_str) if input_str else None
@@ -0,0 +1,13 @@
1
+ from pig_util import outputSchema
2
+
3
+ #
4
+ # This is where we write python UDFs (User-Defined Functions) that we can call from pig.
5
+ # Pig needs to know the schema of the data coming out of the function,
6
+ # which we specify using the @outputSchema decorator.
7
+ #
8
+ @outputSchema('example_udf:int')
9
+ def example_udf(input_str):
10
+ """
11
+ A simple example function that just returns the length of the string passed in.
12
+ """
13
+ return len(input_str) if input_str else None
@@ -0,0 +1,20 @@
1
+ #
2
+ # Copyright 2012 Mortar Data Inc.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ #
16
+
17
+ module Mortar
18
+ # see http://semver.org/
19
+ VERSION = "0.1.0"
20
+ end
@@ -0,0 +1,598 @@
1
+ # encoding: UTF-8
2
+ #
3
+ # Copyright 2011, 2012 Keith Rarick
4
+ #
5
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ # of this software and associated documentation files (the "Software"), to deal
7
+ # in the Software without restriction, including without limitation the rights
8
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ # copies of the Software, and to permit persons to whom the Software is
10
+ # furnished to do so, subject to the following conditions:
11
+ #
12
+ # The above copyright notice and this permission notice shall be included in
13
+ # all copies or substantial portions of the Software.
14
+ #
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ # THE SOFTWARE.
22
+
23
+ # See https://github.com/kr/okjson for updates.
24
+
25
+ require 'stringio'
26
+
27
+ # Some parts adapted from
28
+ # http://golang.org/src/pkg/json/decode.go and
29
+ # http://golang.org/src/pkg/utf8/utf8.go
30
+ module Mortar
31
+ module OkJson
32
+ extend self
33
+
34
+
35
+ # Decodes a json document in string s and
36
+ # returns the corresponding ruby value.
37
+ # String s must be valid UTF-8. If you have
38
+ # a string in some other encoding, convert
39
+ # it first.
40
+ #
41
+ # String values in the resulting structure
42
+ # will be UTF-8.
43
+ def decode(s)
44
+ ts = lex(s)
45
+ v, ts = textparse(ts)
46
+ if ts.length > 0
47
+ raise Error, 'trailing garbage'
48
+ end
49
+ v
50
+ end
51
+
52
+
53
+ # Parses a "json text" in the sense of RFC 4627.
54
+ # Returns the parsed value and any trailing tokens.
55
+ # Note: this is almost the same as valparse,
56
+ # except that it does not accept atomic values.
57
+ def textparse(ts)
58
+ if ts.length < 0
59
+ raise Error, 'empty'
60
+ end
61
+
62
+ typ, _, val = ts[0]
63
+ case typ
64
+ when '{' then objparse(ts)
65
+ when '[' then arrparse(ts)
66
+ else
67
+ raise Error, "unexpected #{val.inspect}"
68
+ end
69
+ end
70
+
71
+
72
+ # Parses a "value" in the sense of RFC 4627.
73
+ # Returns the parsed value and any trailing tokens.
74
+ def valparse(ts)
75
+ if ts.length < 0
76
+ raise Error, 'empty'
77
+ end
78
+
79
+ typ, _, val = ts[0]
80
+ case typ
81
+ when '{' then objparse(ts)
82
+ when '[' then arrparse(ts)
83
+ when :val,:str then [val, ts[1..-1]]
84
+ else
85
+ raise Error, "unexpected #{val.inspect}"
86
+ end
87
+ end
88
+
89
+
90
+ # Parses an "object" in the sense of RFC 4627.
91
+ # Returns the parsed value and any trailing tokens.
92
+ def objparse(ts)
93
+ ts = eat('{', ts)
94
+ obj = {}
95
+
96
+ if ts[0][0] == '}'
97
+ return obj, ts[1..-1]
98
+ end
99
+
100
+ k, v, ts = pairparse(ts)
101
+ obj[k] = v
102
+
103
+ if ts[0][0] == '}'
104
+ return obj, ts[1..-1]
105
+ end
106
+
107
+ loop do
108
+ ts = eat(',', ts)
109
+
110
+ k, v, ts = pairparse(ts)
111
+ obj[k] = v
112
+
113
+ if ts[0][0] == '}'
114
+ return obj, ts[1..-1]
115
+ end
116
+ end
117
+ end
118
+
119
+
120
+ # Parses a "member" in the sense of RFC 4627.
121
+ # Returns the parsed values and any trailing tokens.
122
+ def pairparse(ts)
123
+ (typ, _, k), ts = ts[0], ts[1..-1]
124
+ if typ != :str
125
+ raise Error, "unexpected #{k.inspect}"
126
+ end
127
+ ts = eat(':', ts)
128
+ v, ts = valparse(ts)
129
+ [k, v, ts]
130
+ end
131
+
132
+
133
+ # Parses an "array" in the sense of RFC 4627.
134
+ # Returns the parsed value and any trailing tokens.
135
+ def arrparse(ts)
136
+ ts = eat('[', ts)
137
+ arr = []
138
+
139
+ if ts[0][0] == ']'
140
+ return arr, ts[1..-1]
141
+ end
142
+
143
+ v, ts = valparse(ts)
144
+ arr << v
145
+
146
+ if ts[0][0] == ']'
147
+ return arr, ts[1..-1]
148
+ end
149
+
150
+ loop do
151
+ ts = eat(',', ts)
152
+
153
+ v, ts = valparse(ts)
154
+ arr << v
155
+
156
+ if ts[0][0] == ']'
157
+ return arr, ts[1..-1]
158
+ end
159
+ end
160
+ end
161
+
162
+
163
+ def eat(typ, ts)
164
+ if ts[0][0] != typ
165
+ raise Error, "expected #{typ} (got #{ts[0].inspect})"
166
+ end
167
+ ts[1..-1]
168
+ end
169
+
170
+
171
+ # Scans s and returns a list of json tokens,
172
+ # excluding white space (as defined in RFC 4627).
173
+ def lex(s)
174
+ ts = []
175
+ while s.length > 0
176
+ typ, lexeme, val = tok(s)
177
+ if typ == nil
178
+ raise Error, "invalid character at #{s[0,10].inspect}"
179
+ end
180
+ if typ != :space
181
+ ts << [typ, lexeme, val]
182
+ end
183
+ s = s[lexeme.length..-1]
184
+ end
185
+ ts
186
+ end
187
+
188
+
189
+ # Scans the first token in s and
190
+ # returns a 3-element list, or nil
191
+ # if s does not begin with a valid token.
192
+ #
193
+ # The first list element is one of
194
+ # '{', '}', ':', ',', '[', ']',
195
+ # :val, :str, and :space.
196
+ #
197
+ # The second element is the lexeme.
198
+ #
199
+ # The third element is the value of the
200
+ # token for :val and :str, otherwise
201
+ # it is the lexeme.
202
+ def tok(s)
203
+ case s[0]
204
+ when ?{ then ['{', s[0,1], s[0,1]]
205
+ when ?} then ['}', s[0,1], s[0,1]]
206
+ when ?: then [':', s[0,1], s[0,1]]
207
+ when ?, then [',', s[0,1], s[0,1]]
208
+ when ?[ then ['[', s[0,1], s[0,1]]
209
+ when ?] then [']', s[0,1], s[0,1]]
210
+ when ?n then nulltok(s)
211
+ when ?t then truetok(s)
212
+ when ?f then falsetok(s)
213
+ when ?" then strtok(s)
214
+ when Spc then [:space, s[0,1], s[0,1]]
215
+ when ?\t then [:space, s[0,1], s[0,1]]
216
+ when ?\n then [:space, s[0,1], s[0,1]]
217
+ when ?\r then [:space, s[0,1], s[0,1]]
218
+ else numtok(s)
219
+ end
220
+ end
221
+
222
+
223
+ def nulltok(s); s[0,4] == 'null' ? [:val, 'null', nil] : [] end
224
+ def truetok(s); s[0,4] == 'true' ? [:val, 'true', true] : [] end
225
+ def falsetok(s); s[0,5] == 'false' ? [:val, 'false', false] : [] end
226
+
227
+
228
+ def numtok(s)
229
+ m = /-?([1-9][0-9]+|[0-9])([.][0-9]+)?([eE][+-]?[0-9]+)?/.match(s)
230
+ if m && m.begin(0) == 0
231
+ if m[3] && !m[2]
232
+ [:val, m[0], Integer(m[1])*(10**Integer(m[3][1..-1]))]
233
+ elsif m[2]
234
+ [:val, m[0], Float(m[0])]
235
+ else
236
+ [:val, m[0], Integer(m[0])]
237
+ end
238
+ else
239
+ []
240
+ end
241
+ end
242
+
243
+
244
+ def strtok(s)
245
+ m = /"([^"\\]|\\["\/\\bfnrt]|\\u[0-9a-fA-F]{4})*"/.match(s)
246
+ if ! m
247
+ raise Error, "invalid string literal at #{abbrev(s)}"
248
+ end
249
+ [:str, m[0], unquote(m[0])]
250
+ end
251
+
252
+
253
+ def abbrev(s)
254
+ t = s[0,10]
255
+ p = t['`']
256
+ t = t[0,p] if p
257
+ t = t + '...' if t.length < s.length
258
+ '`' + t + '`'
259
+ end
260
+
261
+
262
+ # Converts a quoted json string literal q into a UTF-8-encoded string.
263
+ # The rules are different than for Ruby, so we cannot use eval.
264
+ # Unquote will raise an error if q contains control characters.
265
+ def unquote(q)
266
+ q = q[1...-1]
267
+ a = q.dup # allocate a big enough string
268
+ rubydoesenc = false
269
+ # In ruby >= 1.9, a[w] is a codepoint, not a byte.
270
+ if a.class.method_defined?(:force_encoding)
271
+ a.force_encoding('UTF-8')
272
+ rubydoesenc = true
273
+ end
274
+ r, w = 0, 0
275
+ while r < q.length
276
+ c = q[r]
277
+ case true
278
+ when c == ?\\
279
+ r += 1
280
+ if r >= q.length
281
+ raise Error, "string literal ends with a \"\\\": \"#{q}\""
282
+ end
283
+
284
+ case q[r]
285
+ when ?",?\\,?/,?'
286
+ a[w] = q[r]
287
+ r += 1
288
+ w += 1
289
+ when ?b,?f,?n,?r,?t
290
+ a[w] = Unesc[q[r]]
291
+ r += 1
292
+ w += 1
293
+ when ?u
294
+ r += 1
295
+ uchar = begin
296
+ hexdec4(q[r,4])
297
+ rescue RuntimeError => e
298
+ raise Error, "invalid escape sequence \\u#{q[r,4]}: #{e}"
299
+ end
300
+ r += 4
301
+ if surrogate? uchar
302
+ if q.length >= r+6
303
+ uchar1 = hexdec4(q[r+2,4])
304
+ uchar = subst(uchar, uchar1)
305
+ if uchar != Ucharerr
306
+ # A valid pair; consume.
307
+ r += 6
308
+ end
309
+ end
310
+ end
311
+ if rubydoesenc
312
+ a[w] = '' << uchar
313
+ w += 1
314
+ else
315
+ w += ucharenc(a, w, uchar)
316
+ end
317
+ else
318
+ raise Error, "invalid escape char #{q[r]} in \"#{q}\""
319
+ end
320
+ when c == ?", c < Spc
321
+ raise Error, "invalid character in string literal \"#{q}\""
322
+ else
323
+ # Copy anything else byte-for-byte.
324
+ # Valid UTF-8 will remain valid UTF-8.
325
+ # Invalid UTF-8 will remain invalid UTF-8.
326
+ # In ruby >= 1.9, c is a codepoint, not a byte,
327
+ # in which case this is still what we want.
328
+ a[w] = c
329
+ r += 1
330
+ w += 1
331
+ end
332
+ end
333
+ a[0,w]
334
+ end
335
+
336
+
337
+ # Encodes unicode character u as UTF-8
338
+ # bytes in string a at position i.
339
+ # Returns the number of bytes written.
340
+ def ucharenc(a, i, u)
341
+ case true
342
+ when u <= Uchar1max
343
+ a[i] = (u & 0xff).chr
344
+ 1
345
+ when u <= Uchar2max
346
+ a[i+0] = (Utag2 | ((u>>6)&0xff)).chr
347
+ a[i+1] = (Utagx | (u&Umaskx)).chr
348
+ 2
349
+ when u <= Uchar3max
350
+ a[i+0] = (Utag3 | ((u>>12)&0xff)).chr
351
+ a[i+1] = (Utagx | ((u>>6)&Umaskx)).chr
352
+ a[i+2] = (Utagx | (u&Umaskx)).chr
353
+ 3
354
+ else
355
+ a[i+0] = (Utag4 | ((u>>18)&0xff)).chr
356
+ a[i+1] = (Utagx | ((u>>12)&Umaskx)).chr
357
+ a[i+2] = (Utagx | ((u>>6)&Umaskx)).chr
358
+ a[i+3] = (Utagx | (u&Umaskx)).chr
359
+ 4
360
+ end
361
+ end
362
+
363
+
364
+ def hexdec4(s)
365
+ if s.length != 4
366
+ raise Error, 'short'
367
+ end
368
+ (nibble(s[0])<<12) | (nibble(s[1])<<8) | (nibble(s[2])<<4) | nibble(s[3])
369
+ end
370
+
371
+
372
+ def subst(u1, u2)
373
+ if Usurr1 <= u1 && u1 < Usurr2 && Usurr2 <= u2 && u2 < Usurr3
374
+ return ((u1-Usurr1)<<10) | (u2-Usurr2) + Usurrself
375
+ end
376
+ return Ucharerr
377
+ end
378
+
379
+
380
+ def surrogate?(u)
381
+ Usurr1 <= u && u < Usurr3
382
+ end
383
+
384
+
385
+ def nibble(c)
386
+ case true
387
+ when ?0 <= c && c <= ?9 then c.ord - ?0.ord
388
+ when ?a <= c && c <= ?z then c.ord - ?a.ord + 10
389
+ when ?A <= c && c <= ?Z then c.ord - ?A.ord + 10
390
+ else
391
+ raise Error, "invalid hex code #{c}"
392
+ end
393
+ end
394
+
395
+
396
+ # Encodes x into a json text. It may contain only
397
+ # Array, Hash, String, Numeric, true, false, nil.
398
+ # (Note, this list excludes Symbol.)
399
+ # X itself must be an Array or a Hash.
400
+ # No other value can be encoded, and an error will
401
+ # be raised if x contains any other value, such as
402
+ # Nan, Infinity, Symbol, and Proc, or if a Hash key
403
+ # is not a String.
404
+ # Strings contained in x must be valid UTF-8.
405
+ def encode(x)
406
+ case x
407
+ when Hash then objenc(x)
408
+ when Array then arrenc(x)
409
+ else
410
+ raise Error, 'root value must be an Array or a Hash'
411
+ end
412
+ end
413
+
414
+
415
+ def valenc(x)
416
+ case x
417
+ when Hash then objenc(x)
418
+ when Array then arrenc(x)
419
+ when String then strenc(x)
420
+ when Numeric then numenc(x)
421
+ when true then "true"
422
+ when false then "false"
423
+ when nil then "null"
424
+ else
425
+ raise Error, "cannot encode #{x.class}: #{x.inspect}"
426
+ end
427
+ end
428
+
429
+
430
+ def objenc(x)
431
+ '{' + x.map{|k,v| keyenc(k) + ':' + valenc(v)}.join(',') + '}'
432
+ end
433
+
434
+
435
+ def arrenc(a)
436
+ '[' + a.map{|x| valenc(x)}.join(',') + ']'
437
+ end
438
+
439
+
440
+ def keyenc(k)
441
+ case k
442
+ when String then strenc(k)
443
+ else
444
+ raise Error, "Hash key is not a string: #{k.inspect}"
445
+ end
446
+ end
447
+
448
+
449
+ def strenc(s)
450
+ t = StringIO.new
451
+ t.putc(?")
452
+ r = 0
453
+
454
+ # In ruby >= 1.9, s[r] is a codepoint, not a byte.
455
+ rubydoesenc = s.class.method_defined?(:encoding)
456
+
457
+ while r < s.length
458
+ case s[r]
459
+ when ?" then t.print('\\"')
460
+ when ?\\ then t.print('\\\\')
461
+ when ?\b then t.print('\\b')
462
+ when ?\f then t.print('\\f')
463
+ when ?\n then t.print('\\n')
464
+ when ?\r then t.print('\\r')
465
+ when ?\t then t.print('\\t')
466
+ else
467
+ c = s[r]
468
+ case true
469
+ when rubydoesenc
470
+ begin
471
+ c.ord # will raise an error if c is invalid UTF-8
472
+ t.write(c)
473
+ rescue
474
+ t.write(Ustrerr)
475
+ end
476
+ when Spc <= c && c <= ?~
477
+ t.putc(c)
478
+ else
479
+ n = ucharcopy(t, s, r) # ensure valid UTF-8 output
480
+ r += n - 1 # r is incremented below
481
+ end
482
+ end
483
+ r += 1
484
+ end
485
+ t.putc(?")
486
+ t.string
487
+ end
488
+
489
+
490
+ def numenc(x)
491
+ if ((x.nan? || x.infinite?) rescue false)
492
+ raise Error, "Numeric cannot be represented: #{x}"
493
+ end
494
+ "#{x}"
495
+ end
496
+
497
+
498
+ # Copies the valid UTF-8 bytes of a single character
499
+ # from string s at position i to I/O object t, and
500
+ # returns the number of bytes copied.
501
+ # If no valid UTF-8 char exists at position i,
502
+ # ucharcopy writes Ustrerr and returns 1.
503
+ def ucharcopy(t, s, i)
504
+ n = s.length - i
505
+ raise Utf8Error if n < 1
506
+
507
+ c0 = s[i].ord
508
+
509
+ # 1-byte, 7-bit sequence?
510
+ if c0 < Utagx
511
+ t.putc(c0)
512
+ return 1
513
+ end
514
+
515
+ raise Utf8Error if c0 < Utag2 # unexpected continuation byte?
516
+
517
+ raise Utf8Error if n < 2 # need continuation byte
518
+ c1 = s[i+1].ord
519
+ raise Utf8Error if c1 < Utagx || Utag2 <= c1
520
+
521
+ # 2-byte, 11-bit sequence?
522
+ if c0 < Utag3
523
+ raise Utf8Error if ((c0&Umask2)<<6 | (c1&Umaskx)) <= Uchar1max
524
+ t.putc(c0)
525
+ t.putc(c1)
526
+ return 2
527
+ end
528
+
529
+ # need second continuation byte
530
+ raise Utf8Error if n < 3
531
+
532
+ c2 = s[i+2].ord
533
+ raise Utf8Error if c2 < Utagx || Utag2 <= c2
534
+
535
+ # 3-byte, 16-bit sequence?
536
+ if c0 < Utag4
537
+ u = (c0&Umask3)<<12 | (c1&Umaskx)<<6 | (c2&Umaskx)
538
+ raise Utf8Error if u <= Uchar2max
539
+ t.putc(c0)
540
+ t.putc(c1)
541
+ t.putc(c2)
542
+ return 3
543
+ end
544
+
545
+ # need third continuation byte
546
+ raise Utf8Error if n < 4
547
+ c3 = s[i+3].ord
548
+ raise Utf8Error if c3 < Utagx || Utag2 <= c3
549
+
550
+ # 4-byte, 21-bit sequence?
551
+ if c0 < Utag5
552
+ u = (c0&Umask4)<<18 | (c1&Umaskx)<<12 | (c2&Umaskx)<<6 | (c3&Umaskx)
553
+ raise Utf8Error if u <= Uchar3max
554
+ t.putc(c0)
555
+ t.putc(c1)
556
+ t.putc(c2)
557
+ t.putc(c3)
558
+ return 4
559
+ end
560
+
561
+ raise Utf8Error
562
+ rescue Utf8Error
563
+ t.write(Ustrerr)
564
+ return 1
565
+ end
566
+
567
+
568
+ class Utf8Error < ::StandardError
569
+ end
570
+
571
+
572
+ class Error < ::StandardError
573
+ end
574
+
575
+
576
+ Utagx = 0x80 # 1000 0000
577
+ Utag2 = 0xc0 # 1100 0000
578
+ Utag3 = 0xe0 # 1110 0000
579
+ Utag4 = 0xf0 # 1111 0000
580
+ Utag5 = 0xF8 # 1111 1000
581
+ Umaskx = 0x3f # 0011 1111
582
+ Umask2 = 0x1f # 0001 1111
583
+ Umask3 = 0x0f # 0000 1111
584
+ Umask4 = 0x07 # 0000 0111
585
+ Uchar1max = (1<<7) - 1
586
+ Uchar2max = (1<<11) - 1
587
+ Uchar3max = (1<<16) - 1
588
+ Ucharerr = 0xFFFD # unicode "replacement char"
589
+ Ustrerr = "\xef\xbf\xbd" # unicode "replacement char"
590
+ Usurrself = 0x10000
591
+ Usurr1 = 0xd800
592
+ Usurr2 = 0xdc00
593
+ Usurr3 = 0xe000
594
+
595
+ Spc = ' '[0]
596
+ Unesc = {?b=>?\b, ?f=>?\f, ?n=>?\n, ?r=>?\r, ?t=>?\t}
597
+ end
598
+ end