archieml 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +5 -2
- data/lib/archieml/loader.rb +70 -37
- data/lib/archieml/version.rb +1 -1
- data/spec/lib/archieml/loader_spec.rb +2 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: db51dfa0cf2524c0161ff7106ef2ab0d6dc38193
|
4
|
+
data.tar.gz: 6caa7995d8f4ed9e281c3c4e48f323b0eacaf69a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e83fe41f68351de40718cca29cbc4651be79287961f5dd5a299f23d1d777f01f006af80bc5faf07211f55a987ed75dc17832ff7a6cf5e98f99d6aa1b5befe377
|
7
|
+
data.tar.gz: 87fa405bd6107a62922da75d67a5eb4c7e4029c0064e94ff8d9ae40ec3f260b23725397ba5e8f48826ab23c69ee00f851cd186eea495823525bb15fb615b0847
|
data/README.md
CHANGED
@@ -4,7 +4,7 @@ Parse Archie Markup Language (ArchieML) documents into Ruby Hashes.
|
|
4
4
|
|
5
5
|
Read about the ArchieML specification at [archieml.org](http://archieml.org).
|
6
6
|
|
7
|
-
The current version is `v0.
|
7
|
+
The current version is `v0.3.0`.
|
8
8
|
|
9
9
|
## Installation
|
10
10
|
|
@@ -58,7 +58,7 @@ client.authorization = flow.authorize
|
|
58
58
|
|
59
59
|
Log into your Google account and authorize the application to access your Google Drive files.
|
60
60
|
|
61
|
-
Now that you have an authenticated `client`, you can make an API call to a document saved in Drive. Create a document with some basic AML inside (such as "key: value"), save it, and note the long string of characters at the end of the URL:
|
61
|
+
Now that you have an authenticated `client`, you can make an API call to a document saved in Drive. Create a document with some basic AML inside (such as "key: value"), save it, and note the long string of characters at the end of the URL:
|
62
62
|
|
63
63
|
`https://docs.google.com/a/nytimes.com/document/d/[FILE_ID]/edit`
|
64
64
|
|
@@ -169,10 +169,13 @@ aml = Archieml.load(html_aml)
|
|
169
169
|
|
170
170
|
## Tests
|
171
171
|
|
172
|
+
Test examples are stored in a submodule. You may need to run `git submodule update --init` to fetch them.
|
173
|
+
|
172
174
|
There is a full test suite using rspec. `bundle install`, and then `rspec` to execute them.
|
173
175
|
|
174
176
|
## Changelog
|
175
177
|
|
178
|
+
* `0.3.0` - Freeform arrays type and unicode.
|
176
179
|
* `0.2.0` - Updated to support an updated ArchieML spec: [2015-05-09](http://archieml.org/spec/1.0/CR-20150509.html). Adds support for nested arrays.
|
177
180
|
* `0.1.1` - More consistent handling of newlines. Fixed bugs around detecting the scope of multi-line values.
|
178
181
|
* `0.1.0` - Initial release supporting the first version of the ArchieML spec, published [2015-03-06](http://archieml.org/spec/1.0/CR-20150306.html).
|
data/lib/archieml/loader.rb
CHANGED
@@ -1,11 +1,13 @@
|
|
1
1
|
module Archieml
|
2
2
|
class Loader
|
3
3
|
|
4
|
-
|
5
|
-
|
4
|
+
WHITESPACE_PATTERN = "\u0000\u0009\u000A\u000B\u000C\u000D\u0020\u00A0\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200A\u200B\u2028\u2029\u202F\u205F\u3000\uFEFF"
|
5
|
+
SLUG_BLACKLIST = "#{WHITESPACE_PATTERN}\u005B\u005C\u005D\u007B\u007D\u003A"
|
6
|
+
|
7
|
+
START_KEY = /^\s*([^#{Regexp.escape(SLUG_BLACKLIST)}]+)[ \t\r]*:[ \t\r]*(.*(?:\n|\r|$))/
|
6
8
|
COMMAND_KEY = /^\s*:[ \t\r]*(endskip|ignore|skip|end)(.*(?:\n|\r|$))/i
|
7
9
|
ARRAY_ELEMENT = /^\s*\*[ \t\r]*(.*(?:\n|\r|$))/
|
8
|
-
SCOPE_PATTERN = /^\s*(\[|\{)[ \t\r]*([
|
10
|
+
SCOPE_PATTERN = /^\s*(\[|\{)[ \t\r]*([\+\.]*)[ \t\r]*([^#{Regexp.escape(SLUG_BLACKLIST)}]*)[ \t\r]*(?:\]|\}).*?(\n|\r|$)/
|
9
11
|
|
10
12
|
def initialize(options = {})
|
11
13
|
@data = @scope = {}
|
@@ -40,10 +42,10 @@ module Archieml
|
|
40
42
|
self.parse_array_element(match[1])
|
41
43
|
|
42
44
|
elsif !@is_skipping && match = line.match(SCOPE_PATTERN)
|
43
|
-
self.parse_scope(match[1], match[2])
|
45
|
+
self.parse_scope(match[1], match[2], match[3])
|
44
46
|
|
45
47
|
else
|
46
|
-
|
48
|
+
self.parse_text(line)
|
47
49
|
end
|
48
50
|
end
|
49
51
|
|
@@ -56,11 +58,12 @@ module Archieml
|
|
56
58
|
|
57
59
|
self.increment_array_element(key)
|
58
60
|
|
61
|
+
key = 'value' if (@stack_scope && @stack_scope[:flags].match(/\+/))
|
62
|
+
|
59
63
|
@buffer_key = key
|
60
64
|
@buffer_string = rest_of_line
|
61
65
|
|
62
66
|
self.flush_buffer_into(key, replace: true)
|
63
|
-
@buffer_key = key
|
64
67
|
end
|
65
68
|
|
66
69
|
def parse_array_element(value)
|
@@ -68,13 +71,10 @@ module Archieml
|
|
68
71
|
|
69
72
|
@stack_scope[:array_type] ||= :simple
|
70
73
|
|
71
|
-
# Ignore simple array elements inside complex arrays
|
72
|
-
return if @stack_scope[:array_type] == :complex
|
73
|
-
|
74
74
|
@stack_scope[:array] << ''
|
75
|
+
@buffer_key = @stack_scope[:array]
|
75
76
|
@buffer_string = value
|
76
77
|
self.flush_buffer_into(@stack_scope[:array], replace: true)
|
77
|
-
@buffer_key = @stack_scope[:array]
|
78
78
|
end
|
79
79
|
|
80
80
|
def parse_command_key(command)
|
@@ -100,47 +100,59 @@ module Archieml
|
|
100
100
|
self.flush_buffer!
|
101
101
|
end
|
102
102
|
|
103
|
-
def parse_scope(scope_type, scope_key)
|
103
|
+
def parse_scope(scope_type, flags, scope_key)
|
104
104
|
self.flush_buffer!
|
105
105
|
|
106
106
|
if scope_key == ''
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
@stack_scope = nil
|
111
|
-
@stack = []
|
112
|
-
when '['
|
113
|
-
# Move up a level
|
114
|
-
if last_stack_item = @stack.pop
|
115
|
-
@scope = last_stack_item[:scope] || @data
|
116
|
-
@stack_scope = @stack.last
|
117
|
-
end
|
118
|
-
end
|
107
|
+
last_stack_item = @stack.pop
|
108
|
+
@scope = (last_stack_item ? last_stack_item[:scope] : @data) || @data
|
109
|
+
@stack_scope = @stack.last
|
119
110
|
|
120
111
|
elsif %w([ {).include?(scope_type)
|
121
112
|
nesting = false
|
122
113
|
key_scope = @data
|
123
114
|
|
124
|
-
if
|
125
|
-
scope_key = scope_key[1..-1]
|
115
|
+
if flags.match(/^\./)
|
126
116
|
self.increment_array_element(scope_key)
|
127
117
|
nesting = true
|
128
118
|
key_scope = @scope if @stack_scope
|
119
|
+
else
|
120
|
+
@scope = @data
|
121
|
+
@stack = []
|
129
122
|
end
|
130
123
|
|
131
|
-
|
132
|
-
|
133
|
-
|
124
|
+
# Within freeforms, the `type` of nested objects and arrays is taken
|
125
|
+
# verbatim from the `keyScope`.
|
126
|
+
if @stack_scope && @stack_scope[:flags].match(/\+/)
|
127
|
+
parsed_scope_key = scope_key
|
128
|
+
|
129
|
+
# Outside of freeforms, dot-notation interpreted as nested data.
|
130
|
+
else
|
131
|
+
key_bits = scope_key.split('.')
|
132
|
+
key_bits[0...-1].each do |bit|
|
133
|
+
key_scope = key_scope[bit] ||= {}
|
134
|
+
end
|
135
|
+
parsed_scope_key = key_bits.last
|
134
136
|
end
|
135
137
|
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
scope
|
142
|
-
|
138
|
+
# Content of nested scopes within a freeform should be stored under "value."
|
139
|
+
if (@stack_scope && @stack_scope[:flags].match(/\+/) && flags.match(/\./))
|
140
|
+
if scope_type == '['
|
141
|
+
parsed_scope_key = 'value'
|
142
|
+
elsif scope_type == '{'
|
143
|
+
@scope = @scope[:value] = {}
|
144
|
+
end
|
145
|
+
end
|
143
146
|
|
147
|
+
stack_scope_item = {
|
148
|
+
array: nil,
|
149
|
+
array_type: nil,
|
150
|
+
array_first_key: nil,
|
151
|
+
flags: flags,
|
152
|
+
scope: @scope
|
153
|
+
}
|
154
|
+
if scope_type == '['
|
155
|
+
stack_scope_item[:array] = key_scope[parsed_scope_key] = []
|
144
156
|
if nesting
|
145
157
|
@stack << stack_scope_item
|
146
158
|
else
|
@@ -149,11 +161,25 @@ module Archieml
|
|
149
161
|
@stack_scope = @stack.last
|
150
162
|
|
151
163
|
elsif scope_type == '{'
|
152
|
-
|
164
|
+
if nesting
|
165
|
+
@stack << stack_scope_item
|
166
|
+
else
|
167
|
+
@scope = key_scope[parsed_scope_key] = key_scope[parsed_scope_key].is_a?(Hash) ? key_scope[parsed_scope_key] : {}
|
168
|
+
@stack = [stack_scope_item]
|
169
|
+
end
|
170
|
+
@stack_scope = @stack.last
|
153
171
|
end
|
154
172
|
end
|
155
173
|
end
|
156
174
|
|
175
|
+
def parse_text(text)
|
176
|
+
if @stack_scope && @stack_scope[:flags].match(/\+/) && text.match(/[^\n\r\s]/)
|
177
|
+
@stack_scope[:array] << { "type" => "text", "value" => text.gsub(/(^\s*)|(\s*$)/, '') }
|
178
|
+
else
|
179
|
+
@buffer_string += text
|
180
|
+
end
|
181
|
+
end
|
182
|
+
|
157
183
|
def increment_array_element(key)
|
158
184
|
# Special handling for arrays. If this is the start of the array, remember
|
159
185
|
# which key was encountered first. If this is a duplicate encounter of
|
@@ -168,7 +194,12 @@ module Archieml
|
|
168
194
|
if @stack_scope[:array_first_key] == nil || @stack_scope[:array_first_key] == key
|
169
195
|
@stack_scope[:array] << (@scope = {})
|
170
196
|
end
|
171
|
-
@stack_scope[:
|
197
|
+
if (@stack_scope[:flags].match(/\+/))
|
198
|
+
@scope[:type] = key
|
199
|
+
# key = 'content'
|
200
|
+
else
|
201
|
+
@stack_scope[:array_first_key] ||= key
|
202
|
+
end
|
172
203
|
end
|
173
204
|
end
|
174
205
|
|
@@ -180,11 +211,13 @@ module Archieml
|
|
180
211
|
end
|
181
212
|
|
182
213
|
def flush_buffer_into(key, options = {})
|
214
|
+
existing_buffer_key = @buffer_key
|
183
215
|
value = self.flush_buffer!
|
184
216
|
|
185
217
|
if options[:replace]
|
186
218
|
value = self.format_value(value, :replace).sub(/^\s*/, '')
|
187
219
|
@buffer_string = value.match(/\s*\Z/)[0]
|
220
|
+
@buffer_key = existing_buffer_key
|
188
221
|
else
|
189
222
|
value = self.format_value(value, :append)
|
190
223
|
end
|
data/lib/archieml/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: archieml
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Michael Strickland
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2016-06-19 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: Parse Archie Markup Language documents
|
14
14
|
email:
|