archieml 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: e1769d7385a90df46fb88685e1be8cef959943c1
4
- data.tar.gz: c92bea5829cc1729197ee1035f7398256b3205c8
3
+ metadata.gz: db51dfa0cf2524c0161ff7106ef2ab0d6dc38193
4
+ data.tar.gz: 6caa7995d8f4ed9e281c3c4e48f323b0eacaf69a
5
5
  SHA512:
6
- metadata.gz: 045a8a57c8676be985fd6194c708f09eb157b93ea97ef032d3a88bfe0e4d6431f94924c25e66e0e9265bef9fd54ce07a47a3a2bdbc87e94bbee8d395abc81d00
7
- data.tar.gz: 7bbd0d896a7ae3348814ab36ec6cbe795739ab9543b1971542671d21b629b26b1c15f32b4e7594ff83898c80dec090e3b8bd12e317b92a1023d0748d9742b19d
6
+ metadata.gz: e83fe41f68351de40718cca29cbc4651be79287961f5dd5a299f23d1d777f01f006af80bc5faf07211f55a987ed75dc17832ff7a6cf5e98f99d6aa1b5befe377
7
+ data.tar.gz: 87fa405bd6107a62922da75d67a5eb4c7e4029c0064e94ff8d9ae40ec3f260b23725397ba5e8f48826ab23c69ee00f851cd186eea495823525bb15fb615b0847
data/README.md CHANGED
@@ -4,7 +4,7 @@ Parse Archie Markup Language (ArchieML) documents into Ruby Hashes.
4
4
 
5
5
  Read about the ArchieML specification at [archieml.org](http://archieml.org).
6
6
 
7
- The current version is `v0.2.0`.
7
+ The current version is `v0.3.0`.
8
8
 
9
9
  ## Installation
10
10
 
@@ -58,7 +58,7 @@ client.authorization = flow.authorize
58
58
 
59
59
  Log into your Google account and authorize the application to access your Google Drive files.
60
60
 
61
- Now that you have an authenticated `client`, you can make an API call to a document saved in Drive. Create a document with some basic AML inside (such as "key: value"), save it, and note the long string of characters at the end of the URL:
61
+ Now that you have an authenticated `client`, you can make an API call to a document saved in Drive. Create a document with some basic AML inside (such as "key: value"), save it, and note the long string of characters at the end of the URL:
62
62
 
63
63
  `https://docs.google.com/a/nytimes.com/document/d/[FILE_ID]/edit`
64
64
 
@@ -169,10 +169,13 @@ aml = Archieml.load(html_aml)
169
169
 
170
170
  ## Tests
171
171
 
172
+ Test examples are stored in a submodule. You may need to run `git submodule update --init` to fetch them.
173
+
172
174
  There is a full test suite using rspec. `bundle install`, and then `rspec` to execute them.
173
175
 
174
176
  ## Changelog
175
177
 
178
+ * `0.3.0` - Freeform arrays type and unicode.
176
179
  * `0.2.0` - Updated to support an updated ArchieML spec: [2015-05-09](http://archieml.org/spec/1.0/CR-20150509.html). Adds support for nested arrays.
177
180
  * `0.1.1` - More consistent handling of newlines. Fixed bugs around detecting the scope of multi-line values.
178
181
  * `0.1.0` - Initial release supporting the first version of the ArchieML spec, published [2015-03-06](http://archieml.org/spec/1.0/CR-20150306.html).
@@ -1,11 +1,13 @@
1
1
  module Archieml
2
2
  class Loader
3
3
 
4
- NEXT_LINE = /.*((\r|\n)+)/
5
- START_KEY = /^\s*([A-Za-z0-9\-_\.]+)[ \t\r]*:[ \t\r]*(.*(?:\n|\r|$))/
4
+ WHITESPACE_PATTERN = "\u0000\u0009\u000A\u000B\u000C\u000D\u0020\u00A0\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200A\u200B\u2028\u2029\u202F\u205F\u3000\uFEFF"
5
+ SLUG_BLACKLIST = "#{WHITESPACE_PATTERN}\u005B\u005C\u005D\u007B\u007D\u003A"
6
+
7
+ START_KEY = /^\s*([^#{Regexp.escape(SLUG_BLACKLIST)}]+)[ \t\r]*:[ \t\r]*(.*(?:\n|\r|$))/
6
8
  COMMAND_KEY = /^\s*:[ \t\r]*(endskip|ignore|skip|end)(.*(?:\n|\r|$))/i
7
9
  ARRAY_ELEMENT = /^\s*\*[ \t\r]*(.*(?:\n|\r|$))/
8
- SCOPE_PATTERN = /^\s*(\[|\{)[ \t\r]*([A-Za-z0-9\-_\.]*)[ \t\r]*(?:\]|\}).*?(\n|\r|$)/
10
+ SCOPE_PATTERN = /^\s*(\[|\{)[ \t\r]*([\+\.]*)[ \t\r]*([^#{Regexp.escape(SLUG_BLACKLIST)}]*)[ \t\r]*(?:\]|\}).*?(\n|\r|$)/
9
11
 
10
12
  def initialize(options = {})
11
13
  @data = @scope = {}
@@ -40,10 +42,10 @@ module Archieml
40
42
  self.parse_array_element(match[1])
41
43
 
42
44
  elsif !@is_skipping && match = line.match(SCOPE_PATTERN)
43
- self.parse_scope(match[1], match[2])
45
+ self.parse_scope(match[1], match[2], match[3])
44
46
 
45
47
  else
46
- @buffer_string += line
48
+ self.parse_text(line)
47
49
  end
48
50
  end
49
51
 
@@ -56,11 +58,12 @@ module Archieml
56
58
 
57
59
  self.increment_array_element(key)
58
60
 
61
+ key = 'value' if (@stack_scope && @stack_scope[:flags].match(/\+/))
62
+
59
63
  @buffer_key = key
60
64
  @buffer_string = rest_of_line
61
65
 
62
66
  self.flush_buffer_into(key, replace: true)
63
- @buffer_key = key
64
67
  end
65
68
 
66
69
  def parse_array_element(value)
@@ -68,13 +71,10 @@ module Archieml
68
71
 
69
72
  @stack_scope[:array_type] ||= :simple
70
73
 
71
- # Ignore simple array elements inside complex arrays
72
- return if @stack_scope[:array_type] == :complex
73
-
74
74
  @stack_scope[:array] << ''
75
+ @buffer_key = @stack_scope[:array]
75
76
  @buffer_string = value
76
77
  self.flush_buffer_into(@stack_scope[:array], replace: true)
77
- @buffer_key = @stack_scope[:array]
78
78
  end
79
79
 
80
80
  def parse_command_key(command)
@@ -100,47 +100,59 @@ module Archieml
100
100
  self.flush_buffer!
101
101
  end
102
102
 
103
- def parse_scope(scope_type, scope_key)
103
+ def parse_scope(scope_type, flags, scope_key)
104
104
  self.flush_buffer!
105
105
 
106
106
  if scope_key == ''
107
- case scope_type
108
- when '{'
109
- @scope = @data
110
- @stack_scope = nil
111
- @stack = []
112
- when '['
113
- # Move up a level
114
- if last_stack_item = @stack.pop
115
- @scope = last_stack_item[:scope] || @data
116
- @stack_scope = @stack.last
117
- end
118
- end
107
+ last_stack_item = @stack.pop
108
+ @scope = (last_stack_item ? last_stack_item[:scope] : @data) || @data
109
+ @stack_scope = @stack.last
119
110
 
120
111
  elsif %w([ {).include?(scope_type)
121
112
  nesting = false
122
113
  key_scope = @data
123
114
 
124
- if scope_key.match(/^\./)
125
- scope_key = scope_key[1..-1]
115
+ if flags.match(/^\./)
126
116
  self.increment_array_element(scope_key)
127
117
  nesting = true
128
118
  key_scope = @scope if @stack_scope
119
+ else
120
+ @scope = @data
121
+ @stack = []
129
122
  end
130
123
 
131
- key_bits = scope_key.split('.')
132
- key_bits[0...-1].each do |bit|
133
- key_scope = key_scope[bit] ||= {}
124
+ # Within freeforms, the `type` of nested objects and arrays is taken
125
+ # verbatim from the `keyScope`.
126
+ if @stack_scope && @stack_scope[:flags].match(/\+/)
127
+ parsed_scope_key = scope_key
128
+
129
+ # Outside of freeforms, dot-notation interpreted as nested data.
130
+ else
131
+ key_bits = scope_key.split('.')
132
+ key_bits[0...-1].each do |bit|
133
+ key_scope = key_scope[bit] ||= {}
134
+ end
135
+ parsed_scope_key = key_bits.last
134
136
  end
135
137
 
136
- if scope_type == '['
137
- stack_scope_item = {
138
- array: key_scope[key_bits.last] = [],
139
- array_type: nil,
140
- array_first_key: nil,
141
- scope: @scope
142
- }
138
+ # Content of nested scopes within a freeform should be stored under "value."
139
+ if (@stack_scope && @stack_scope[:flags].match(/\+/) && flags.match(/\./))
140
+ if scope_type == '['
141
+ parsed_scope_key = 'value'
142
+ elsif scope_type == '{'
143
+ @scope = @scope[:value] = {}
144
+ end
145
+ end
143
146
 
147
+ stack_scope_item = {
148
+ array: nil,
149
+ array_type: nil,
150
+ array_first_key: nil,
151
+ flags: flags,
152
+ scope: @scope
153
+ }
154
+ if scope_type == '['
155
+ stack_scope_item[:array] = key_scope[parsed_scope_key] = []
144
156
  if nesting
145
157
  @stack << stack_scope_item
146
158
  else
@@ -149,11 +161,25 @@ module Archieml
149
161
  @stack_scope = @stack.last
150
162
 
151
163
  elsif scope_type == '{'
152
- @scope = key_scope[key_bits.last] = key_scope[key_bits.last].is_a?(Hash) ? key_scope[key_bits.last] : {}
164
+ if nesting
165
+ @stack << stack_scope_item
166
+ else
167
+ @scope = key_scope[parsed_scope_key] = key_scope[parsed_scope_key].is_a?(Hash) ? key_scope[parsed_scope_key] : {}
168
+ @stack = [stack_scope_item]
169
+ end
170
+ @stack_scope = @stack.last
153
171
  end
154
172
  end
155
173
  end
156
174
 
175
+ def parse_text(text)
176
+ if @stack_scope && @stack_scope[:flags].match(/\+/) && text.match(/[^\n\r\s]/)
177
+ @stack_scope[:array] << { "type" => "text", "value" => text.gsub(/(^\s*)|(\s*$)/, '') }
178
+ else
179
+ @buffer_string += text
180
+ end
181
+ end
182
+
157
183
  def increment_array_element(key)
158
184
  # Special handling for arrays. If this is the start of the array, remember
159
185
  # which key was encountered first. If this is a duplicate encounter of
@@ -168,7 +194,12 @@ module Archieml
168
194
  if @stack_scope[:array_first_key] == nil || @stack_scope[:array_first_key] == key
169
195
  @stack_scope[:array] << (@scope = {})
170
196
  end
171
- @stack_scope[:array_first_key] ||= key
197
+ if (@stack_scope[:flags].match(/\+/))
198
+ @scope[:type] = key
199
+ # key = 'content'
200
+ else
201
+ @stack_scope[:array_first_key] ||= key
202
+ end
172
203
  end
173
204
  end
174
205
 
@@ -180,11 +211,13 @@ module Archieml
180
211
  end
181
212
 
182
213
  def flush_buffer_into(key, options = {})
214
+ existing_buffer_key = @buffer_key
183
215
  value = self.flush_buffer!
184
216
 
185
217
  if options[:replace]
186
218
  value = self.format_value(value, :replace).sub(/^\s*/, '')
187
219
  @buffer_string = value.match(/\s*\Z/)[0]
220
+ @buffer_key = existing_buffer_key
188
221
  else
189
222
  value = self.format_value(value, :append)
190
223
  end
@@ -1,3 +1,3 @@
1
1
  module Archieml
2
- VERSION = '0.2.0'
2
+ VERSION = '0.3.0'
3
3
  end
@@ -14,6 +14,8 @@ describe Archieml::Loader do
14
14
  aml.delete('test')
15
15
  aml.delete('result')
16
16
 
17
+ aml = JSON.parse(JSON.dump(aml))
18
+
17
19
  it "#{slug}.#{idx} #{test}" do
18
20
  aml.should == result
19
21
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: archieml
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Michael Strickland
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-05-11 00:00:00.000000000 Z
11
+ date: 2016-06-19 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: Parse Archie Markup Language documents
14
14
  email: