clj 0.0.4.5 → 0.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. data/lib/clj/parser.rb +224 -180
  2. data/lib/clj/types.rb +2 -1
  3. metadata +7 -6
data/lib/clj/parser.rb CHANGED
@@ -8,155 +8,260 @@
8
8
  # 0. You just DO WHAT THE FUCK YOU WANT TO.
9
9
  #++
10
10
 
11
- require 'strscan'
11
+ require 'stringio'
12
12
 
13
13
  class Clojure
14
14
 
15
- class Parser < StringScanner
16
- UNPARSED = Object.new
15
+ class Parser
16
+ IGNORE = [" ", ",", "\n", "\r", "\t"]
17
+ SEPARATOR = ['"', '{', '}', '(', ')', '[', ']', '#']
18
+ BOTH = IGNORE + SEPARATOR
19
+ KEYWORD = ["'", '^', '@', '`', '~', '\\', ';'] + BOTH
17
20
 
18
- IGNORE = %r(
19
- (?:
20
- //[^\n\r]*[\n\r]| # line comments
21
- /\* # c-style comments
22
- (?:
23
- [^*/]| # normal chars
24
- /[^*]| # slashes that do not start a nested comment
25
- \*[^/]| # asterisks that do not end this comment
26
- /(?=\*/) # single slash before this comment's end
27
- )*
28
- \*/ # the End of this comment
29
- |[ \t\r\n,]+ # whitespaces: space, horicontal tab, lf, cr, and comma
30
- )
31
- )mx
21
+ # Unescape characters in strings.
22
+ UNESCAPE_MAP = Hash.new { |h, k| h[k] = k.chr }
23
+ UNESCAPE_MAP.update({
24
+ ?" => '"',
25
+ ?\\ => '\\',
26
+ ?/ => '/',
27
+ ?b => "\b",
28
+ ?f => "\f",
29
+ ?n => "\n",
30
+ ?r => "\r",
31
+ ?t => "\t",
32
+ ?u => nil,
33
+ })
32
34
 
33
- STRING = /" ((?:[^\x0-\x1f"\\] |
34
- # escaped special characters:
35
- \\["\\\/bfnrt] |
36
- \\u[0-9a-fA-F]{4} |
37
- # match all but escaped special characters:
38
- \\[\x20-\x21\x23-\x2e\x30-\x5b\x5d-\x61\x63-\x65\x67-\x6d\x6f-\x71\x73\x75-\xff])*)
39
- "/nx
35
+ EMPTY_8BIT_STRING = ''
40
36
 
41
- KEYWORD = /:([^(\[{'^@`~\"\\,\s;)\]}]+)/
37
+ if EMPTY_8BIT_STRING.respond_to? :force_encoding
38
+ EMPTY_8BIT_STRING.force_encoding Encoding::ASCII_8BIT
39
+ end
42
40
 
43
- INTEGER = /(-?0|-?[1-9]\d*)/
41
+ def initialize (source, options = {})
42
+ @source = source.is_a?(String) ? StringIO.new(source) : source
43
+ @options = options
44
44
 
45
- BIGNUM = /#{INTEGER}N/
45
+ @map_class = options[:map_class] || Hash
46
+ @vector_class = options[:vector_class] || Array
47
+ @list_class = options[:list_class] || Array
48
+ @set_class = options[:set_class] || Array
49
+ end
46
50
 
47
- FLOAT = /(-?
48
- (?:0|[1-9]\d*)
49
- (?:
50
- \.\d+(?i:e[+-]?\d+) |
51
- \.\d+ |
52
- (?i:e[+-]?\d+)
53
- )
54
- )/x
51
+ def parse
52
+ read_next
53
+ end
55
54
 
56
- BIGDECIMAL = /#{FLOAT}M/
55
+ private
56
+ def next_type (ch)
57
+ case ch
58
+ when '0'..'9', '-', '+' then :number
59
+ when 't', 'f' then :boolean
60
+ when 'n' then :nil
61
+ when '\\' then :char
62
+ when ':' then :keyword
63
+ when '"' then :string
64
+ when '{' then :map
65
+ when '(' then :list
66
+ when '[' then :vector
67
+ when '#'
68
+ case read(1)
69
+ when 'i' then :instant
70
+ when '{' then :set
71
+ when '"' then :regexp
72
+ end
73
+ end or raise SyntaxError, 'unknown type'
74
+ end
57
75
 
58
- RATIONAL = /(#{INTEGER}\/#{INTEGER})/
76
+ def read_next
77
+ ch = ignore(false)
59
78
 
60
- REGEXP = /#"((\\.|[^"])+)"/
79
+ raise SyntaxError, 'unexpected EOF' unless ch
61
80
 
62
- INSTANT = /#inst#{IGNORE}*"(.*?)"/
81
+ __send__ "read_#{next_type ch}", ch
82
+ end
63
83
 
64
- VECTOR_OPEN = /\[/
65
- VECTOR_CLOSE = /\]/
84
+ def read_nil (ch)
85
+ unless read(2).bytesize == 2
86
+ raise SyntaxError, 'unexpected EOF'
87
+ end
66
88
 
67
- LIST_OPEN = /\(/
68
- LIST_CLOSE = /\)/
89
+ nil
90
+ end
69
91
 
70
- SET_OPEN = /\#\{/
71
- SET_CLOSE = /\}/
92
+ def read_boolean (ch)
93
+ if ch == 't'
94
+ unless read(3).bytesize == 3
95
+ raise SyntaxError, 'unexpected EOF'
96
+ end
97
+
98
+ true
99
+ else
100
+ unless read(4).bytesize == 4
101
+ raise SyntaxError, 'unexpected EOF'
102
+ end
72
103
 
73
- HASH_OPEN = /\{/
74
- HASH_CLOSE = /\}/
104
+ false
105
+ end
106
+ end
75
107
 
76
- TRUE = /true/
77
- FALSE = /false/
78
- NIL = /nil/
108
+ def read_number (ch)
109
+ piece = ch
79
110
 
80
- def initialize (source, options = {})
81
- super(source)
111
+ while (ch = read(1)) && !BOTH.include?(ch)
112
+ piece << ch
113
+ end
82
114
 
83
- @hash_class = options[:hash_class] || Hash
84
- @vector_class = options[:vector_class] || Array
85
- @list_class = options[:list_class] || Array
86
- @set_class = options[:set_class] || Array
115
+ revert(ch)
116
+
117
+ if piece.include? '/'
118
+ Rational(piece)
119
+ elsif piece.include? '.' or piece.end_with? 'M'
120
+ if piece.end_with? 'M'
121
+ piece[-1] = ''
122
+
123
+ BigDecimal(piece)
124
+ else
125
+ Float(piece)
126
+ end
127
+ else
128
+ if piece.end_with? 'N'
129
+ piece[-1] = ''
130
+ end
131
+
132
+ Integer(piece)
133
+ end
134
+ end
135
+
136
+ def read_char (ch)
137
+ ch = read(1)
138
+
139
+ unescape(if ch == 'u' && lookahead(1) =~ /[0-9a-fA-F]/
140
+ "\\u#{read(4)}"
141
+ else
142
+ ch
143
+ end)
144
+ end
145
+
146
+ def read_keyword (ch)
147
+ result = ''
148
+
149
+ while (ch = read(1)) && !KEYWORD.include?(ch)
150
+ result << ch
151
+ end
152
+
153
+ revert(ch)
154
+
155
+ result.to_sym
87
156
  end
88
157
 
89
- alias source string
158
+ def read_string (ch)
159
+ result = ''
160
+
161
+ while (ch = read(1)) != '"'
162
+ raise SyntaxError, 'unexpected EOF' unless ch
90
163
 
91
- def parsable? (what)
92
- !!case what
93
- when :vector then scan(VECTOR_OPEN)
94
- when :list then scan(LIST_OPEN)
95
- when :set then scan(SET_OPEN)
96
- when :hash then scan(HASH_OPEN)
164
+ result << ch
165
+
166
+ if ch == '\\'
167
+ result << read(1)
168
+ end
97
169
  end
170
+
171
+ unescape(result)
172
+ end
173
+
174
+ def read_instant (ch)
175
+ read(3)
176
+
177
+ DateTime.rfc3339(read_string(ignore(false)))
98
178
  end
99
179
 
100
- def parse (check = true)
101
- reset if check
180
+ def read_regexp (ch)
181
+ result = ''
102
182
 
103
- result = case
104
- when parsable?(:vector) then parse_vector
105
- when parsable?(:list) then parse_list
106
- when parsable?(:set) then parse_set
107
- when parsable?(:hash) then parse_hash
108
- else parse_value
183
+ while (ch = read(1)) != '"'
184
+ raise SyntaxError, 'unexpected EOF' unless ch
185
+
186
+ result << ch
187
+
188
+ if ch == '\\'
189
+ result << read(1)
190
+ end
109
191
  end
110
192
 
111
- if check && result == UNPARSED
112
- raise SyntaxError, 'the string does not contain proper clojure'
193
+ /#{result}/
194
+ end
195
+
196
+ def read_list (ch)
197
+ result = @list_class.new
198
+
199
+ ignore
200
+
201
+ while lookahead(1) != ')'
202
+ result << read_next
203
+ ignore
113
204
  end
114
205
 
206
+ read(1)
207
+
115
208
  result
116
209
  end
117
210
 
118
- def parse_value
119
- case
120
- when scan(RATIONAL) then Rational(self[1])
121
- when scan(BIGDECIMAL) then require 'bigdecimal'; BigDecimal(self[1])
122
- when scan(FLOAT) then Float(self[1])
123
- when scan(BIGNUM) then Integer(self[1])
124
- when scan(INTEGER) then Integer(self[1])
125
- when scan(REGEXP) then /#{self[1]}/
126
- when scan(INSTANT) then DateTime.rfc3339(self[1])
127
- when scan(STRING) then parse_string
128
- when scan(KEYWORD) then self[1].to_sym
129
- when scan(TRUE) then true
130
- when scan(FALSE) then false
131
- when scan(NIL) then nil
132
- else UNPARSED
211
+ def read_vector (ch)
212
+ result = @vector_class.new
213
+
214
+ ignore
215
+
216
+ while lookahead(1) != ']'
217
+ result << read_next
218
+ ignore
133
219
  end
220
+
221
+ read(1)
222
+
223
+ result
134
224
  end
135
225
 
136
- # Unescape characters in strings.
137
- UNESCAPE_MAP = Hash.new { |h, k| h[k] = k.chr }
138
- UNESCAPE_MAP.update({
139
- ?" => '"',
140
- ?\\ => '\\',
141
- ?/ => '/',
142
- ?b => "\b",
143
- ?f => "\f",
144
- ?n => "\n",
145
- ?r => "\r",
146
- ?t => "\t",
147
- ?u => nil,
148
- })
226
+ def read_set (ch)
227
+ result = @set_class.new
149
228
 
150
- EMPTY_8BIT_STRING = ''
229
+ ignore
151
230
 
152
- if EMPTY_8BIT_STRING.respond_to? :force_encoding
153
- EMPTY_8BIT_STRING.force_encoding Encoding::ASCII_8BIT
231
+ while lookahead(1) != '}'
232
+ result << read_next
233
+ ignore
234
+ end
235
+
236
+ read(1)
237
+
238
+ if result.uniq!
239
+ raise SyntaxError, 'the set contains non unique values'
240
+ end
241
+
242
+ result
154
243
  end
155
244
 
156
- def parse_string
157
- return '' if self[1].empty?
245
+ def read_map (ch)
246
+ result = @map_class.new
247
+
248
+ ignore
249
+
250
+ while lookahead(1) != '}'
251
+ key = read_next
252
+ ignore
253
+ value = read_next
158
254
 
159
- self[1].gsub(%r((?:\\[\\bfnrt"/]|(?:\\u(?:[A-Fa-f\d]{4}))+|\\[\x20-\xff]))n) {|escape|
255
+ result[key] = value
256
+ end
257
+
258
+ read(1)
259
+
260
+ result
261
+ end
262
+
263
+ def unescape (string)
264
+ string.gsub(%r((?:\\[\\bfnrt"/]|(?:\\u(?:[A-Fa-f\d]{4}))+|\\[\x20-\xff]))n) {|escape|
160
265
  if u = UNESCAPE_MAP[$&[1]]
161
266
  next u
162
267
  end
@@ -179,90 +284,29 @@ class Parser < StringScanner
179
284
  }
180
285
  end
181
286
 
182
- def parse_vector
183
- result = @vector_class.new
184
-
185
- until eos?
186
- case
187
- when (value = parse(false)) != UNPARSED
188
- result << value
189
- when scan(VECTOR_CLOSE)
190
- break
191
- when skip(/#{IGNORE}+/)
192
- ;
193
- else
194
- raise SyntaxError, 'wat do'
195
- end
196
- end
197
-
198
- result
287
+ def read (length)
288
+ @source.read(length)
199
289
  end
200
290
 
201
- def parse_list
202
- result = @list_class.new
291
+ def lookahead (length = nil)
292
+ original = @source.tell
293
+ result = @source.read(length)
203
294
 
204
- until eos?
205
- case
206
- when (value = parse(false)) != UNPARSED
207
- result << value
208
- when scan(LIST_CLOSE)
209
- break
210
- when skip(/#{IGNORE}+/)
211
- ;
212
- else
213
- raise SyntaxError, 'wat do'
214
- end
215
- end
295
+ @source.seek(original)
216
296
 
217
297
  result
218
298
  end
219
299
 
220
- def parse_set
221
- result = @set_class.new
300
+ def ignore (ungetc = true)
301
+ while IGNORE.include?(ch = read(1)); end
222
302
 
223
- until eos?
224
- case
225
- when (value = parse(false)) != UNPARSED
226
- result << value
227
- when scan(SET_CLOSE)
228
- break
229
- when skip(/#{IGNORE}+/)
230
- ;
231
- else
232
- raise SyntaxError, 'wat do'
233
- end
234
- end
303
+ return false unless ch
235
304
 
236
- if result.uniq!
237
- raise SyntaxError, 'the set contains non unique values'
238
- end
239
-
240
- result
305
+ ungetc ? revert(ch) : ch
241
306
  end
242
307
 
243
- def parse_hash
244
- result = @hash_class.new
245
-
246
- until eos?
247
- case
248
- when (key = parse(false)) != UNPARSED
249
- skip(/#{IGNORE}*/)
250
-
251
- if (value = parse(false)) == UNPARSED
252
- raise SyntaxError, 'no value for the hash'
253
- end
254
-
255
- result[key] = value
256
- when scan(HASH_CLOSE)
257
- break
258
- when skip(/#{IGNORE}+/)
259
- ;
260
- else
261
- raise SyntaxError, 'wat do'
262
- end
263
- end
264
-
265
- result
308
+ def revert (ch)
309
+ @source.ungetc(ch)
266
310
  end
267
311
  end
268
312
 
data/lib/clj/types.rb CHANGED
@@ -9,6 +9,7 @@
9
9
  #++
10
10
 
11
11
  require 'date'
12
+ require 'bigdecimal'
12
13
 
13
14
  [Numeric, TrueClass, FalseClass, NilClass].each {|klass|
14
15
  klass.instance_eval {
@@ -22,7 +23,7 @@ class Symbol
22
23
  def to_clj (options = {})
23
24
  result = inspect
24
25
 
25
- unless Clojure::Parser::KEYWORD === result
26
+ if Clojure::Parser::KEYWORD.any? { |c| result.include? c }
26
27
  raise ArgumentError, "#{result} cannot be transformed into clojure"
27
28
  end
28
29
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: clj
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.4.5
4
+ version: 0.0.5
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-02-07 00:00:00.000000000 Z
12
+ date: 2012-02-14 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rake
16
- requirement: &9143400 !ruby/object:Gem::Requirement
16
+ requirement: &10309060 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: '0'
22
22
  type: :development
23
23
  prerelease: false
24
- version_requirements: *9143400
24
+ version_requirements: *10309060
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: rspec
27
- requirement: &9142780 !ruby/object:Gem::Requirement
27
+ requirement: &10307740 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ! '>='
@@ -32,7 +32,7 @@ dependencies:
32
32
  version: '0'
33
33
  type: :development
34
34
  prerelease: false
35
- version_requirements: *9142780
35
+ version_requirements: *10307740
36
36
  description:
37
37
  email: meh@paranoici.org
38
38
  executables: []
@@ -67,3 +67,4 @@ signing_key:
67
67
  specification_version: 3
68
68
  summary: Like json, but with clojure sexps.
69
69
  test_files: []
70
+ has_rdoc: