archieml 0.2.0 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +5 -2
- data/lib/archieml/loader.rb +70 -37
- data/lib/archieml/version.rb +1 -1
- data/spec/lib/archieml/loader_spec.rb +2 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: db51dfa0cf2524c0161ff7106ef2ab0d6dc38193
|
4
|
+
data.tar.gz: 6caa7995d8f4ed9e281c3c4e48f323b0eacaf69a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e83fe41f68351de40718cca29cbc4651be79287961f5dd5a299f23d1d777f01f006af80bc5faf07211f55a987ed75dc17832ff7a6cf5e98f99d6aa1b5befe377
|
7
|
+
data.tar.gz: 87fa405bd6107a62922da75d67a5eb4c7e4029c0064e94ff8d9ae40ec3f260b23725397ba5e8f48826ab23c69ee00f851cd186eea495823525bb15fb615b0847
|
data/README.md
CHANGED
@@ -4,7 +4,7 @@ Parse Archie Markup Language (ArchieML) documents into Ruby Hashes.
|
|
4
4
|
|
5
5
|
Read about the ArchieML specification at [archieml.org](http://archieml.org).
|
6
6
|
|
7
|
-
The current version is `v0.
|
7
|
+
The current version is `v0.3.0`.
|
8
8
|
|
9
9
|
## Installation
|
10
10
|
|
@@ -58,7 +58,7 @@ client.authorization = flow.authorize
|
|
58
58
|
|
59
59
|
Log into your Google account and authorize the application to access your Google Drive files.
|
60
60
|
|
61
|
-
Now that you have an authenticated `client`, you can make an API call to a document saved in Drive. Create a document with some basic AML inside (such as "key: value"), save it, and note the long string of characters at the end of the URL:
|
61
|
+
Now that you have an authenticated `client`, you can make an API call to a document saved in Drive. Create a document with some basic AML inside (such as "key: value"), save it, and note the long string of characters at the end of the URL:
|
62
62
|
|
63
63
|
`https://docs.google.com/a/nytimes.com/document/d/[FILE_ID]/edit`
|
64
64
|
|
@@ -169,10 +169,13 @@ aml = Archieml.load(html_aml)
|
|
169
169
|
|
170
170
|
## Tests
|
171
171
|
|
172
|
+
Test examples are stored in a submodule. You may need to run `git submodule update --init` to fetch them.
|
173
|
+
|
172
174
|
There is a full test suite using rspec. `bundle install`, and then `rspec` to execute them.
|
173
175
|
|
174
176
|
## Changelog
|
175
177
|
|
178
|
+
* `0.3.0` - Freeform arrays type and unicode.
|
176
179
|
* `0.2.0` - Updated to support an updated ArchieML spec: [2015-05-09](http://archieml.org/spec/1.0/CR-20150509.html). Adds support for nested arrays.
|
177
180
|
* `0.1.1` - More consistent handling of newlines. Fixed bugs around detecting the scope of multi-line values.
|
178
181
|
* `0.1.0` - Initial release supporting the first version of the ArchieML spec, published [2015-03-06](http://archieml.org/spec/1.0/CR-20150306.html).
|
data/lib/archieml/loader.rb
CHANGED
@@ -1,11 +1,13 @@
|
|
1
1
|
module Archieml
|
2
2
|
class Loader
|
3
3
|
|
4
|
-
|
5
|
-
|
4
|
+
WHITESPACE_PATTERN = "\u0000\u0009\u000A\u000B\u000C\u000D\u0020\u00A0\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200A\u200B\u2028\u2029\u202F\u205F\u3000\uFEFF"
|
5
|
+
SLUG_BLACKLIST = "#{WHITESPACE_PATTERN}\u005B\u005C\u005D\u007B\u007D\u003A"
|
6
|
+
|
7
|
+
START_KEY = /^\s*([^#{Regexp.escape(SLUG_BLACKLIST)}]+)[ \t\r]*:[ \t\r]*(.*(?:\n|\r|$))/
|
6
8
|
COMMAND_KEY = /^\s*:[ \t\r]*(endskip|ignore|skip|end)(.*(?:\n|\r|$))/i
|
7
9
|
ARRAY_ELEMENT = /^\s*\*[ \t\r]*(.*(?:\n|\r|$))/
|
8
|
-
SCOPE_PATTERN = /^\s*(\[|\{)[ \t\r]*([
|
10
|
+
SCOPE_PATTERN = /^\s*(\[|\{)[ \t\r]*([\+\.]*)[ \t\r]*([^#{Regexp.escape(SLUG_BLACKLIST)}]*)[ \t\r]*(?:\]|\}).*?(\n|\r|$)/
|
9
11
|
|
10
12
|
def initialize(options = {})
|
11
13
|
@data = @scope = {}
|
@@ -40,10 +42,10 @@ module Archieml
|
|
40
42
|
self.parse_array_element(match[1])
|
41
43
|
|
42
44
|
elsif !@is_skipping && match = line.match(SCOPE_PATTERN)
|
43
|
-
self.parse_scope(match[1], match[2])
|
45
|
+
self.parse_scope(match[1], match[2], match[3])
|
44
46
|
|
45
47
|
else
|
46
|
-
|
48
|
+
self.parse_text(line)
|
47
49
|
end
|
48
50
|
end
|
49
51
|
|
@@ -56,11 +58,12 @@ module Archieml
|
|
56
58
|
|
57
59
|
self.increment_array_element(key)
|
58
60
|
|
61
|
+
key = 'value' if (@stack_scope && @stack_scope[:flags].match(/\+/))
|
62
|
+
|
59
63
|
@buffer_key = key
|
60
64
|
@buffer_string = rest_of_line
|
61
65
|
|
62
66
|
self.flush_buffer_into(key, replace: true)
|
63
|
-
@buffer_key = key
|
64
67
|
end
|
65
68
|
|
66
69
|
def parse_array_element(value)
|
@@ -68,13 +71,10 @@ module Archieml
|
|
68
71
|
|
69
72
|
@stack_scope[:array_type] ||= :simple
|
70
73
|
|
71
|
-
# Ignore simple array elements inside complex arrays
|
72
|
-
return if @stack_scope[:array_type] == :complex
|
73
|
-
|
74
74
|
@stack_scope[:array] << ''
|
75
|
+
@buffer_key = @stack_scope[:array]
|
75
76
|
@buffer_string = value
|
76
77
|
self.flush_buffer_into(@stack_scope[:array], replace: true)
|
77
|
-
@buffer_key = @stack_scope[:array]
|
78
78
|
end
|
79
79
|
|
80
80
|
def parse_command_key(command)
|
@@ -100,47 +100,59 @@ module Archieml
|
|
100
100
|
self.flush_buffer!
|
101
101
|
end
|
102
102
|
|
103
|
-
def parse_scope(scope_type, scope_key)
|
103
|
+
def parse_scope(scope_type, flags, scope_key)
|
104
104
|
self.flush_buffer!
|
105
105
|
|
106
106
|
if scope_key == ''
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
@stack_scope = nil
|
111
|
-
@stack = []
|
112
|
-
when '['
|
113
|
-
# Move up a level
|
114
|
-
if last_stack_item = @stack.pop
|
115
|
-
@scope = last_stack_item[:scope] || @data
|
116
|
-
@stack_scope = @stack.last
|
117
|
-
end
|
118
|
-
end
|
107
|
+
last_stack_item = @stack.pop
|
108
|
+
@scope = (last_stack_item ? last_stack_item[:scope] : @data) || @data
|
109
|
+
@stack_scope = @stack.last
|
119
110
|
|
120
111
|
elsif %w([ {).include?(scope_type)
|
121
112
|
nesting = false
|
122
113
|
key_scope = @data
|
123
114
|
|
124
|
-
if
|
125
|
-
scope_key = scope_key[1..-1]
|
115
|
+
if flags.match(/^\./)
|
126
116
|
self.increment_array_element(scope_key)
|
127
117
|
nesting = true
|
128
118
|
key_scope = @scope if @stack_scope
|
119
|
+
else
|
120
|
+
@scope = @data
|
121
|
+
@stack = []
|
129
122
|
end
|
130
123
|
|
131
|
-
|
132
|
-
|
133
|
-
|
124
|
+
# Within freeforms, the `type` of nested objects and arrays is taken
|
125
|
+
# verbatim from the `keyScope`.
|
126
|
+
if @stack_scope && @stack_scope[:flags].match(/\+/)
|
127
|
+
parsed_scope_key = scope_key
|
128
|
+
|
129
|
+
# Outside of freeforms, dot-notation interpreted as nested data.
|
130
|
+
else
|
131
|
+
key_bits = scope_key.split('.')
|
132
|
+
key_bits[0...-1].each do |bit|
|
133
|
+
key_scope = key_scope[bit] ||= {}
|
134
|
+
end
|
135
|
+
parsed_scope_key = key_bits.last
|
134
136
|
end
|
135
137
|
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
scope
|
142
|
-
|
138
|
+
# Content of nested scopes within a freeform should be stored under "value."
|
139
|
+
if (@stack_scope && @stack_scope[:flags].match(/\+/) && flags.match(/\./))
|
140
|
+
if scope_type == '['
|
141
|
+
parsed_scope_key = 'value'
|
142
|
+
elsif scope_type == '{'
|
143
|
+
@scope = @scope[:value] = {}
|
144
|
+
end
|
145
|
+
end
|
143
146
|
|
147
|
+
stack_scope_item = {
|
148
|
+
array: nil,
|
149
|
+
array_type: nil,
|
150
|
+
array_first_key: nil,
|
151
|
+
flags: flags,
|
152
|
+
scope: @scope
|
153
|
+
}
|
154
|
+
if scope_type == '['
|
155
|
+
stack_scope_item[:array] = key_scope[parsed_scope_key] = []
|
144
156
|
if nesting
|
145
157
|
@stack << stack_scope_item
|
146
158
|
else
|
@@ -149,11 +161,25 @@ module Archieml
|
|
149
161
|
@stack_scope = @stack.last
|
150
162
|
|
151
163
|
elsif scope_type == '{'
|
152
|
-
|
164
|
+
if nesting
|
165
|
+
@stack << stack_scope_item
|
166
|
+
else
|
167
|
+
@scope = key_scope[parsed_scope_key] = key_scope[parsed_scope_key].is_a?(Hash) ? key_scope[parsed_scope_key] : {}
|
168
|
+
@stack = [stack_scope_item]
|
169
|
+
end
|
170
|
+
@stack_scope = @stack.last
|
153
171
|
end
|
154
172
|
end
|
155
173
|
end
|
156
174
|
|
175
|
+
def parse_text(text)
|
176
|
+
if @stack_scope && @stack_scope[:flags].match(/\+/) && text.match(/[^\n\r\s]/)
|
177
|
+
@stack_scope[:array] << { "type" => "text", "value" => text.gsub(/(^\s*)|(\s*$)/, '') }
|
178
|
+
else
|
179
|
+
@buffer_string += text
|
180
|
+
end
|
181
|
+
end
|
182
|
+
|
157
183
|
def increment_array_element(key)
|
158
184
|
# Special handling for arrays. If this is the start of the array, remember
|
159
185
|
# which key was encountered first. If this is a duplicate encounter of
|
@@ -168,7 +194,12 @@ module Archieml
|
|
168
194
|
if @stack_scope[:array_first_key] == nil || @stack_scope[:array_first_key] == key
|
169
195
|
@stack_scope[:array] << (@scope = {})
|
170
196
|
end
|
171
|
-
@stack_scope[:
|
197
|
+
if (@stack_scope[:flags].match(/\+/))
|
198
|
+
@scope[:type] = key
|
199
|
+
# key = 'content'
|
200
|
+
else
|
201
|
+
@stack_scope[:array_first_key] ||= key
|
202
|
+
end
|
172
203
|
end
|
173
204
|
end
|
174
205
|
|
@@ -180,11 +211,13 @@ module Archieml
|
|
180
211
|
end
|
181
212
|
|
182
213
|
def flush_buffer_into(key, options = {})
|
214
|
+
existing_buffer_key = @buffer_key
|
183
215
|
value = self.flush_buffer!
|
184
216
|
|
185
217
|
if options[:replace]
|
186
218
|
value = self.format_value(value, :replace).sub(/^\s*/, '')
|
187
219
|
@buffer_string = value.match(/\s*\Z/)[0]
|
220
|
+
@buffer_key = existing_buffer_key
|
188
221
|
else
|
189
222
|
value = self.format_value(value, :append)
|
190
223
|
end
|
data/lib/archieml/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: archieml
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Michael Strickland
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2016-06-19 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: Parse Archie Markup Language documents
|
14
14
|
email:
|