clj 0.0.4.5 → 0.0.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (3) hide show
  1. data/lib/clj/parser.rb +224 -180
  2. data/lib/clj/types.rb +2 -1
  3. metadata +7 -6
data/lib/clj/parser.rb CHANGED
@@ -8,155 +8,260 @@
8
8
  # 0. You just DO WHAT THE FUCK YOU WANT TO.
9
9
  #++
10
10
 
11
- require 'strscan'
11
+ require 'stringio'
12
12
 
13
13
  class Clojure
14
14
 
15
- class Parser < StringScanner
16
- UNPARSED = Object.new
15
+ class Parser
16
+ IGNORE = [" ", ",", "\n", "\r", "\t"]
17
+ SEPARATOR = ['"', '{', '}', '(', ')', '[', ']', '#']
18
+ BOTH = IGNORE + SEPARATOR
19
+ KEYWORD = ["'", '^', '@', '`', '~', '\\', ';'] + BOTH
17
20
 
18
- IGNORE = %r(
19
- (?:
20
- //[^\n\r]*[\n\r]| # line comments
21
- /\* # c-style comments
22
- (?:
23
- [^*/]| # normal chars
24
- /[^*]| # slashes that do not start a nested comment
25
- \*[^/]| # asterisks that do not end this comment
26
- /(?=\*/) # single slash before this comment's end
27
- )*
28
- \*/ # the End of this comment
29
- |[ \t\r\n,]+ # whitespaces: space, horicontal tab, lf, cr, and comma
30
- )
31
- )mx
21
+ # Unescape characters in strings.
22
+ UNESCAPE_MAP = Hash.new { |h, k| h[k] = k.chr }
23
+ UNESCAPE_MAP.update({
24
+ ?" => '"',
25
+ ?\\ => '\\',
26
+ ?/ => '/',
27
+ ?b => "\b",
28
+ ?f => "\f",
29
+ ?n => "\n",
30
+ ?r => "\r",
31
+ ?t => "\t",
32
+ ?u => nil,
33
+ })
32
34
 
33
- STRING = /" ((?:[^\x0-\x1f"\\] |
34
- # escaped special characters:
35
- \\["\\\/bfnrt] |
36
- \\u[0-9a-fA-F]{4} |
37
- # match all but escaped special characters:
38
- \\[\x20-\x21\x23-\x2e\x30-\x5b\x5d-\x61\x63-\x65\x67-\x6d\x6f-\x71\x73\x75-\xff])*)
39
- "/nx
35
+ EMPTY_8BIT_STRING = ''
40
36
 
41
- KEYWORD = /:([^(\[{'^@`~\"\\,\s;)\]}]+)/
37
+ if EMPTY_8BIT_STRING.respond_to? :force_encoding
38
+ EMPTY_8BIT_STRING.force_encoding Encoding::ASCII_8BIT
39
+ end
42
40
 
43
- INTEGER = /(-?0|-?[1-9]\d*)/
41
+ def initialize (source, options = {})
42
+ @source = source.is_a?(String) ? StringIO.new(source) : source
43
+ @options = options
44
44
 
45
- BIGNUM = /#{INTEGER}N/
45
+ @map_class = options[:map_class] || Hash
46
+ @vector_class = options[:vector_class] || Array
47
+ @list_class = options[:list_class] || Array
48
+ @set_class = options[:set_class] || Array
49
+ end
46
50
 
47
- FLOAT = /(-?
48
- (?:0|[1-9]\d*)
49
- (?:
50
- \.\d+(?i:e[+-]?\d+) |
51
- \.\d+ |
52
- (?i:e[+-]?\d+)
53
- )
54
- )/x
51
+ def parse
52
+ read_next
53
+ end
55
54
 
56
- BIGDECIMAL = /#{FLOAT}M/
55
+ private
56
+ def next_type (ch)
57
+ case ch
58
+ when '0'..'9', '-', '+' then :number
59
+ when 't', 'f' then :boolean
60
+ when 'n' then :nil
61
+ when '\\' then :char
62
+ when ':' then :keyword
63
+ when '"' then :string
64
+ when '{' then :map
65
+ when '(' then :list
66
+ when '[' then :vector
67
+ when '#'
68
+ case read(1)
69
+ when 'i' then :instant
70
+ when '{' then :set
71
+ when '"' then :regexp
72
+ end
73
+ end or raise SyntaxError, 'unknown type'
74
+ end
57
75
 
58
- RATIONAL = /(#{INTEGER}\/#{INTEGER})/
76
+ def read_next
77
+ ch = ignore(false)
59
78
 
60
- REGEXP = /#"((\\.|[^"])+)"/
79
+ raise SyntaxError, 'unexpected EOF' unless ch
61
80
 
62
- INSTANT = /#inst#{IGNORE}*"(.*?)"/
81
+ __send__ "read_#{next_type ch}", ch
82
+ end
63
83
 
64
- VECTOR_OPEN = /\[/
65
- VECTOR_CLOSE = /\]/
84
+ def read_nil (ch)
85
+ unless read(2).bytesize == 2
86
+ raise SyntaxError, 'unexpected EOF'
87
+ end
66
88
 
67
- LIST_OPEN = /\(/
68
- LIST_CLOSE = /\)/
89
+ nil
90
+ end
69
91
 
70
- SET_OPEN = /\#\{/
71
- SET_CLOSE = /\}/
92
+ def read_boolean (ch)
93
+ if ch == 't'
94
+ unless read(3).bytesize == 3
95
+ raise SyntaxError, 'unexpected EOF'
96
+ end
97
+
98
+ true
99
+ else
100
+ unless read(4).bytesize == 4
101
+ raise SyntaxError, 'unexpected EOF'
102
+ end
72
103
 
73
- HASH_OPEN = /\{/
74
- HASH_CLOSE = /\}/
104
+ false
105
+ end
106
+ end
75
107
 
76
- TRUE = /true/
77
- FALSE = /false/
78
- NIL = /nil/
108
+ def read_number (ch)
109
+ piece = ch
79
110
 
80
- def initialize (source, options = {})
81
- super(source)
111
+ while (ch = read(1)) && !BOTH.include?(ch)
112
+ piece << ch
113
+ end
82
114
 
83
- @hash_class = options[:hash_class] || Hash
84
- @vector_class = options[:vector_class] || Array
85
- @list_class = options[:list_class] || Array
86
- @set_class = options[:set_class] || Array
115
+ revert(ch)
116
+
117
+ if piece.include? '/'
118
+ Rational(piece)
119
+ elsif piece.include? '.' or piece.end_with? 'M'
120
+ if piece.end_with? 'M'
121
+ piece[-1] = ''
122
+
123
+ BigDecimal(piece)
124
+ else
125
+ Float(piece)
126
+ end
127
+ else
128
+ if piece.end_with? 'N'
129
+ piece[-1] = ''
130
+ end
131
+
132
+ Integer(piece)
133
+ end
134
+ end
135
+
136
+ def read_char (ch)
137
+ ch = read(1)
138
+
139
+ unescape(if ch == 'u' && lookahead(1) =~ /[0-9a-fA-F]/
140
+ "\\u#{read(4)}"
141
+ else
142
+ ch
143
+ end)
144
+ end
145
+
146
+ def read_keyword (ch)
147
+ result = ''
148
+
149
+ while (ch = read(1)) && !KEYWORD.include?(ch)
150
+ result << ch
151
+ end
152
+
153
+ revert(ch)
154
+
155
+ result.to_sym
87
156
  end
88
157
 
89
- alias source string
158
+ def read_string (ch)
159
+ result = ''
160
+
161
+ while (ch = read(1)) != '"'
162
+ raise SyntaxError, 'unexpected EOF' unless ch
90
163
 
91
- def parsable? (what)
92
- !!case what
93
- when :vector then scan(VECTOR_OPEN)
94
- when :list then scan(LIST_OPEN)
95
- when :set then scan(SET_OPEN)
96
- when :hash then scan(HASH_OPEN)
164
+ result << ch
165
+
166
+ if ch == '\\'
167
+ result << read(1)
168
+ end
97
169
  end
170
+
171
+ unescape(result)
172
+ end
173
+
174
+ def read_instant (ch)
175
+ read(3)
176
+
177
+ DateTime.rfc3339(read_string(ignore(false)))
98
178
  end
99
179
 
100
- def parse (check = true)
101
- reset if check
180
+ def read_regexp (ch)
181
+ result = ''
102
182
 
103
- result = case
104
- when parsable?(:vector) then parse_vector
105
- when parsable?(:list) then parse_list
106
- when parsable?(:set) then parse_set
107
- when parsable?(:hash) then parse_hash
108
- else parse_value
183
+ while (ch = read(1)) != '"'
184
+ raise SyntaxError, 'unexpected EOF' unless ch
185
+
186
+ result << ch
187
+
188
+ if ch == '\\'
189
+ result << read(1)
190
+ end
109
191
  end
110
192
 
111
- if check && result == UNPARSED
112
- raise SyntaxError, 'the string does not contain proper clojure'
193
+ /#{result}/
194
+ end
195
+
196
+ def read_list (ch)
197
+ result = @list_class.new
198
+
199
+ ignore
200
+
201
+ while lookahead(1) != ')'
202
+ result << read_next
203
+ ignore
113
204
  end
114
205
 
206
+ read(1)
207
+
115
208
  result
116
209
  end
117
210
 
118
- def parse_value
119
- case
120
- when scan(RATIONAL) then Rational(self[1])
121
- when scan(BIGDECIMAL) then require 'bigdecimal'; BigDecimal(self[1])
122
- when scan(FLOAT) then Float(self[1])
123
- when scan(BIGNUM) then Integer(self[1])
124
- when scan(INTEGER) then Integer(self[1])
125
- when scan(REGEXP) then /#{self[1]}/
126
- when scan(INSTANT) then DateTime.rfc3339(self[1])
127
- when scan(STRING) then parse_string
128
- when scan(KEYWORD) then self[1].to_sym
129
- when scan(TRUE) then true
130
- when scan(FALSE) then false
131
- when scan(NIL) then nil
132
- else UNPARSED
211
+ def read_vector (ch)
212
+ result = @vector_class.new
213
+
214
+ ignore
215
+
216
+ while lookahead(1) != ']'
217
+ result << read_next
218
+ ignore
133
219
  end
220
+
221
+ read(1)
222
+
223
+ result
134
224
  end
135
225
 
136
- # Unescape characters in strings.
137
- UNESCAPE_MAP = Hash.new { |h, k| h[k] = k.chr }
138
- UNESCAPE_MAP.update({
139
- ?" => '"',
140
- ?\\ => '\\',
141
- ?/ => '/',
142
- ?b => "\b",
143
- ?f => "\f",
144
- ?n => "\n",
145
- ?r => "\r",
146
- ?t => "\t",
147
- ?u => nil,
148
- })
226
+ def read_set (ch)
227
+ result = @set_class.new
149
228
 
150
- EMPTY_8BIT_STRING = ''
229
+ ignore
151
230
 
152
- if EMPTY_8BIT_STRING.respond_to? :force_encoding
153
- EMPTY_8BIT_STRING.force_encoding Encoding::ASCII_8BIT
231
+ while lookahead(1) != '}'
232
+ result << read_next
233
+ ignore
234
+ end
235
+
236
+ read(1)
237
+
238
+ if result.uniq!
239
+ raise SyntaxError, 'the set contains non unique values'
240
+ end
241
+
242
+ result
154
243
  end
155
244
 
156
- def parse_string
157
- return '' if self[1].empty?
245
+ def read_map (ch)
246
+ result = @map_class.new
247
+
248
+ ignore
249
+
250
+ while lookahead(1) != '}'
251
+ key = read_next
252
+ ignore
253
+ value = read_next
158
254
 
159
- self[1].gsub(%r((?:\\[\\bfnrt"/]|(?:\\u(?:[A-Fa-f\d]{4}))+|\\[\x20-\xff]))n) {|escape|
255
+ result[key] = value
256
+ end
257
+
258
+ read(1)
259
+
260
+ result
261
+ end
262
+
263
+ def unescape (string)
264
+ string.gsub(%r((?:\\[\\bfnrt"/]|(?:\\u(?:[A-Fa-f\d]{4}))+|\\[\x20-\xff]))n) {|escape|
160
265
  if u = UNESCAPE_MAP[$&[1]]
161
266
  next u
162
267
  end
@@ -179,90 +284,29 @@ class Parser < StringScanner
179
284
  }
180
285
  end
181
286
 
182
- def parse_vector
183
- result = @vector_class.new
184
-
185
- until eos?
186
- case
187
- when (value = parse(false)) != UNPARSED
188
- result << value
189
- when scan(VECTOR_CLOSE)
190
- break
191
- when skip(/#{IGNORE}+/)
192
- ;
193
- else
194
- raise SyntaxError, 'wat do'
195
- end
196
- end
197
-
198
- result
287
+ def read (length)
288
+ @source.read(length)
199
289
  end
200
290
 
201
- def parse_list
202
- result = @list_class.new
291
+ def lookahead (length = nil)
292
+ original = @source.tell
293
+ result = @source.read(length)
203
294
 
204
- until eos?
205
- case
206
- when (value = parse(false)) != UNPARSED
207
- result << value
208
- when scan(LIST_CLOSE)
209
- break
210
- when skip(/#{IGNORE}+/)
211
- ;
212
- else
213
- raise SyntaxError, 'wat do'
214
- end
215
- end
295
+ @source.seek(original)
216
296
 
217
297
  result
218
298
  end
219
299
 
220
- def parse_set
221
- result = @set_class.new
300
+ def ignore (ungetc = true)
301
+ while IGNORE.include?(ch = read(1)); end
222
302
 
223
- until eos?
224
- case
225
- when (value = parse(false)) != UNPARSED
226
- result << value
227
- when scan(SET_CLOSE)
228
- break
229
- when skip(/#{IGNORE}+/)
230
- ;
231
- else
232
- raise SyntaxError, 'wat do'
233
- end
234
- end
303
+ return false unless ch
235
304
 
236
- if result.uniq!
237
- raise SyntaxError, 'the set contains non unique values'
238
- end
239
-
240
- result
305
+ ungetc ? revert(ch) : ch
241
306
  end
242
307
 
243
- def parse_hash
244
- result = @hash_class.new
245
-
246
- until eos?
247
- case
248
- when (key = parse(false)) != UNPARSED
249
- skip(/#{IGNORE}*/)
250
-
251
- if (value = parse(false)) == UNPARSED
252
- raise SyntaxError, 'no value for the hash'
253
- end
254
-
255
- result[key] = value
256
- when scan(HASH_CLOSE)
257
- break
258
- when skip(/#{IGNORE}+/)
259
- ;
260
- else
261
- raise SyntaxError, 'wat do'
262
- end
263
- end
264
-
265
- result
308
+ def revert (ch)
309
+ @source.ungetc(ch)
266
310
  end
267
311
  end
268
312
 
data/lib/clj/types.rb CHANGED
@@ -9,6 +9,7 @@
9
9
  #++
10
10
 
11
11
  require 'date'
12
+ require 'bigdecimal'
12
13
 
13
14
  [Numeric, TrueClass, FalseClass, NilClass].each {|klass|
14
15
  klass.instance_eval {
@@ -22,7 +23,7 @@ class Symbol
22
23
  def to_clj (options = {})
23
24
  result = inspect
24
25
 
25
- unless Clojure::Parser::KEYWORD === result
26
+ if Clojure::Parser::KEYWORD.any? { |c| result.include? c }
26
27
  raise ArgumentError, "#{result} cannot be transformed into clojure"
27
28
  end
28
29
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: clj
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.4.5
4
+ version: 0.0.5
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-02-07 00:00:00.000000000 Z
12
+ date: 2012-02-14 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rake
16
- requirement: &9143400 !ruby/object:Gem::Requirement
16
+ requirement: &10309060 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: '0'
22
22
  type: :development
23
23
  prerelease: false
24
- version_requirements: *9143400
24
+ version_requirements: *10309060
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: rspec
27
- requirement: &9142780 !ruby/object:Gem::Requirement
27
+ requirement: &10307740 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ! '>='
@@ -32,7 +32,7 @@ dependencies:
32
32
  version: '0'
33
33
  type: :development
34
34
  prerelease: false
35
- version_requirements: *9142780
35
+ version_requirements: *10307740
36
36
  description:
37
37
  email: meh@paranoici.org
38
38
  executables: []
@@ -67,3 +67,4 @@ signing_key:
67
67
  specification_version: 3
68
68
  summary: Like json, but with clojure sexps.
69
69
  test_files: []
70
+ has_rdoc: