farleyknight-ionize 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (98) hide show
  1. data/README.rdoc +59 -0
  2. data/Rakefile +5 -0
  3. data/bin/ionize +47 -0
  4. data/lib/ionize.rb +75 -0
  5. data/lib/ionize/environment.rb +56 -0
  6. data/lib/ionize/environment/application.rb +58 -0
  7. data/lib/ionize/environment/php_array.rb +95 -0
  8. data/lib/ionize/parser.rb +272 -0
  9. data/lib/ionize/tokenizer.rb +544 -0
  10. data/lib/ionize/translate.rb +34 -0
  11. data/lib/ionize/translate/composite_string_statements.rb +79 -0
  12. data/lib/ionize/translate/debug.rb +16 -0
  13. data/lib/ionize/translate/ext.rb +47 -0
  14. data/lib/ionize/translate/function_args.rb +132 -0
  15. data/lib/ionize/translate/if_statements.rb +42 -0
  16. data/lib/ionize/translate/multiple_statements.rb +22 -0
  17. data/lib/ionize/translate/php_to_ruby.rb +40 -0
  18. data/lib/ionize/translate/rails_for_php.rb +191 -0
  19. data/lib/ionize/translate/rewritable.rb +133 -0
  20. data/lib/ionize/translate/rewrites.rb +51 -0
  21. data/lib/ionize/translate/statements.rb +622 -0
  22. data/lib/ionize/translate/switch_case_statements.rb +52 -0
  23. data/lib/ionize/translate/term_statements.rb +76 -0
  24. data/lib/ionize/translate/translator.rb +52 -0
  25. data/lib/ionize/version.rb +9 -0
  26. data/spec/fixtures/array_lookup.php +10 -0
  27. data/spec/fixtures/boolean_operators.php +5 -0
  28. data/spec/fixtures/boolean_operators.rb +6 -0
  29. data/spec/fixtures/class_def.php +34 -0
  30. data/spec/fixtures/class_def.rb +34 -0
  31. data/spec/fixtures/dangling_else.php +8 -0
  32. data/spec/fixtures/dangling_else.rb +12 -0
  33. data/spec/fixtures/drupal_1.php +663 -0
  34. data/spec/fixtures/drupal_2.php +1152 -0
  35. data/spec/fixtures/empty_string.php +12 -0
  36. data/spec/fixtures/for_loop.php +17 -0
  37. data/spec/fixtures/for_loop2.php +13 -0
  38. data/spec/fixtures/for_loop3.php +16 -0
  39. data/spec/fixtures/for_loop3.rb +17 -0
  40. data/spec/fixtures/for_loop4.php +5 -0
  41. data/spec/fixtures/for_loop4.rb +6 -0
  42. data/spec/fixtures/foreach.php +9 -0
  43. data/spec/fixtures/foreach2.php +8 -0
  44. data/spec/fixtures/foreach3.php +7 -0
  45. data/spec/fixtures/foreach3.rb +7 -0
  46. data/spec/fixtures/fun_def.php +9 -0
  47. data/spec/fixtures/fun_def2.php +30 -0
  48. data/spec/fixtures/fun_def2.rb +30 -0
  49. data/spec/fixtures/fun_def3.php +33 -0
  50. data/spec/fixtures/fun_def4.php +43 -0
  51. data/spec/fixtures/fun_def4.rb +37 -0
  52. data/spec/fixtures/fun_def5.php +36 -0
  53. data/spec/fixtures/fun_with_if.php +6 -0
  54. data/spec/fixtures/fun_with_if.rb +6 -0
  55. data/spec/fixtures/fun_with_ifs.php +12 -0
  56. data/spec/fixtures/fun_with_ifs.rb +14 -0
  57. data/spec/fixtures/hello_world.php +6 -0
  58. data/spec/fixtures/heredoc.php +6 -0
  59. data/spec/fixtures/heredoc.rb +5 -0
  60. data/spec/fixtures/if.php +6 -0
  61. data/spec/fixtures/if.rb +7 -0
  62. data/spec/fixtures/if_boolean.php +5 -0
  63. data/spec/fixtures/if_boolean.rb +5 -0
  64. data/spec/fixtures/if_else.php +11 -0
  65. data/spec/fixtures/if_else1.php +17 -0
  66. data/spec/fixtures/if_else2.php +8 -0
  67. data/spec/fixtures/if_else3.php +15 -0
  68. data/spec/fixtures/if_else_nested.php +14 -0
  69. data/spec/fixtures/if_else_nested.rb +15 -0
  70. data/spec/fixtures/if_else_series.php +12 -0
  71. data/spec/fixtures/if_else_series.rb +12 -0
  72. data/spec/fixtures/if_not.php +5 -0
  73. data/spec/fixtures/if_not.rb +5 -0
  74. data/spec/fixtures/if_with_brackets.php +7 -0
  75. data/spec/fixtures/if_with_brackets.rb +7 -0
  76. data/spec/fixtures/long_if_else.php +10 -0
  77. data/spec/fixtures/long_if_else.rb +9 -0
  78. data/spec/fixtures/oo.php +16 -0
  79. data/spec/fixtures/php_nuke/sql_layer.php +527 -0
  80. data/spec/fixtures/postop.php +3 -0
  81. data/spec/fixtures/preop.php +7 -0
  82. data/spec/fixtures/simple_fun_def.php +4 -0
  83. data/spec/fixtures/switch_case.php +13 -0
  84. data/spec/fixtures/switch_case.rb +14 -0
  85. data/spec/fixtures/switch_case2.php +25 -0
  86. data/spec/fixtures/switch_case3.php +40 -0
  87. data/spec/fixtures/switch_case3.rb +42 -0
  88. data/spec/fixtures/switch_case4.php +56 -0
  89. data/spec/fixtures/switch_case5.php +71 -0
  90. data/spec/fixtures/switch_case_with_rescue_nil.php +43 -0
  91. data/spec/fixtures/switch_case_with_rescue_nil.rb +35 -0
  92. data/spec/fixtures/tertiary.php +3 -0
  93. data/spec/helper.rb +17 -0
  94. data/spec/php_environment_spec.rb +83 -0
  95. data/spec/php_parser_spec.rb +121 -0
  96. data/spec/php_translator_spec.rb +358 -0
  97. data/spec/rails_for_php_spec.rb +303 -0
  98. metadata +191 -0
@@ -0,0 +1,544 @@
1
+ require 'rubygems'
2
+ require 'dhaka'
3
+
4
+ module Ionize
5
+ module Php
6
+ class Tokenizer < Dhaka::Tokenizer
7
+ digits = ('0'..'9').to_a
8
+ letters = ('A'..'Z').to_a + ('a'..'z').to_a
9
+
10
+ whitespace = [' ']
11
+ dollar = ['$']
12
+ underscore = ['_']
13
+ percent = ['%']
14
+ slash = ['/']
15
+ backslash = ["\\"]
16
+ star = ['*']
17
+ newline = ["\n"]
18
+ tab = ["\t"]
19
+ squotes = ["'"]
20
+ dquotes = ['"']
21
+ dot = ['.']
22
+ equals = ['=']
23
+ plus = ['+']
24
+ minus = ['-']
25
+ at = ['@']
26
+ pipe = ['|']
27
+ greater = [">"]
28
+ less_than = ["<"]
29
+ caret = ["^"]
30
+ ampersand = ["&"]
31
+
32
+ Symbols = ['[', ']', '(', ')', '{', '}', ':', ';', ',', '!', '-']
33
+
34
+ #
35
+ # Why did I do it this way? Might be easier to read as
36
+ # just straight symbols
37
+ #
38
+ AllCharacters = digits + Symbols + whitespace + star +
39
+ underscore + squotes + dquotes + letters + newline + dot +
40
+ tab + equals + dollar + plus + minus + at +
41
+ pipe + greater + less_than + percent + ampersand +
42
+ ["^", "?", '#', "\r", "/", "\\"]
43
+
44
+ Keywords = %w(as and break case class default do echo
45
+ extends else exit for foreach function global
46
+ if include include_once list new or print
47
+ protected private require require_once return
48
+ static switch var while)
49
+
50
+ Whitespace = [" ", "\t", "\r", "\n"]
51
+
52
+ Identifiers = letters + underscore + digits
53
+ NonIdentifiers = Whitespace + ['"', "'", ">", "<", "="] + Symbols +
54
+ ["^", "+", "-", "|", ".", "/", "&", "\n", "\\", "$"]
55
+
56
+ # TODO: Adding these methods might be a sight that I should
57
+ # switch to the Lexeme approach
58
+
59
+ # Just have to figure out Dhaka's own regex engine
60
+
61
+ def peek
62
+ @input[@curr_char_index+1] and @input[@curr_char_index+1].chr
63
+ end
64
+
65
+ def double_peek
66
+ @input[@curr_char_index+2] and @input[@curr_char_index+2].chr
67
+ end
68
+
69
+ def reverse_peek
70
+ @input[@curr_char_index-1] and @input[@curr_char_index-1].chr
71
+ end
72
+
73
+ def double_reverse_peek
74
+ @input[@curr_char_index-2] and @input[@curr_char_index-2].chr
75
+ end
76
+
77
+ def create_keyword keyword
78
+ create_token keyword, keyword
79
+ keyword.length.times { advance }
80
+ end
81
+
82
+ def create_cast cast
83
+ create_token 'cast', cast
84
+ cast.length.times { advance }
85
+ end
86
+
87
+ def exactly_matches? keyword
88
+ @input.slice(@curr_char_index, keyword.length) == keyword
89
+ end
90
+
91
+ for_state :idle_state do
92
+ for_default do
93
+ create_token "html_string", "" and switch_to :html
94
+ end
95
+ end
96
+
97
+ for_state :html do
98
+ for_default do
99
+ curr_token.value << curr_char and advance
100
+ end
101
+
102
+ for_characters "<" do
103
+ if exactly_matches? "<?php"
104
+ create_token "php_start", "<?php" and 5.times { advance }
105
+ switch_to :php
106
+ else
107
+ curr_token.value << curr_char and advance
108
+ end
109
+ end
110
+ end
111
+
112
+ for_state :php do
113
+ for_characters Symbols do
114
+ create_token curr_char, curr_char and advance
115
+ end
116
+
117
+ for_characters [":"] do
118
+ if peek == ":"
119
+ create_token "::", "::" and 2.times { advance }
120
+ else
121
+ create_token curr_char, curr_char and advance
122
+ end
123
+ end
124
+
125
+ for_characters [";"] do
126
+ create_token curr_char, curr_char and advance
127
+ # Double semicolons ";;" sometimes show up
128
+ # Who came up with such an idiom, I dunno
129
+ advance if curr_char == ";"
130
+ end
131
+
132
+ for_characters ["/"] do
133
+ if peek == '/'
134
+ 2.times { advance }
135
+ switch_to :comment
136
+ elsif peek == '*'
137
+ 2.times { advance }
138
+ switch_to :multiline_comment
139
+ else
140
+ create_token 'op', '/'
141
+ advance
142
+ end
143
+ end
144
+
145
+ for_characters ['#'] do
146
+ if peek == '!'
147
+ switch_to :unix_interpreter_directive and 2.times { advance }
148
+ else
149
+ switch_to :comment
150
+ end
151
+ end
152
+
153
+ for_characters ['?'] do
154
+ if peek == ">"
155
+ create_token "php_end", "?>" and 2.times { advance }
156
+ switch_to :idle_state
157
+ else
158
+ create_token '?', '?' and advance
159
+ end
160
+ end
161
+
162
+ for_characters ['@'] do
163
+ create_token '@', '@' and advance
164
+ end
165
+
166
+ for_characters ["!"] do
167
+ if peek == "="
168
+ if double_peek == "="
169
+ create_token "op", "!==" and 3.times { advance }
170
+ else
171
+ create_token "!=", "!=" and 2.times { advance }
172
+ end
173
+ else
174
+ create_token "!", "!" and advance
175
+ end
176
+ end
177
+
178
+ for_character ["|"] do
179
+ if peek == "|"
180
+ create_token "||", "||" and 2.times { advance }
181
+ else
182
+ create_token "op", "|" and advance
183
+ end
184
+ end
185
+
186
+ for_characters ["+"] do
187
+ if peek == "+"
188
+ create_token "++", "++" and 2.times { advance }
189
+ elsif peek == "="
190
+ create_token "op", "+=" and 2.times { advance }
191
+ else
192
+ create_token "+", "+" and advance
193
+ end
194
+ end
195
+
196
+ for_characters ["-"] do
197
+ if peek == "-"
198
+ create_token "--", "--" and 2.times { advance }
199
+ elsif peek == ">"
200
+ create_token "->", "->" and 2.times { advance }
201
+ elsif peek == "="
202
+ create_token "op", "-=" and 2.times { advance }
203
+ else
204
+ create_token "-", "-" and advance
205
+ end
206
+ end
207
+
208
+ for_characters ['%', '*'] do
209
+ create_token('op', curr_char) and advance
210
+ end
211
+
212
+ for_characters ['<'] do
213
+ if exactly_matches? "<?php" or exactly_matches? "<?PHP"
214
+ create_token "php_start", "<?php" and 5.times { advance }
215
+ elsif peek == ">"
216
+ create_token "<>", "<>" and 2.times { advance }
217
+ elsif exactly_matches? "<<<"
218
+ 3.times { advance } and switch_to :heredoc
219
+ elsif peek == '='
220
+ create_token "op", "<=" and 2.times { advance }
221
+ else
222
+ create_token "<", "<" and advance
223
+ end
224
+ end
225
+
226
+ for_characters [">"] do
227
+ if peek == '='
228
+ create_token "op", ">=" and 2.times { advance }
229
+ else
230
+ create_token ">", ">" and advance
231
+ end
232
+ end
233
+
234
+ for_character ['.'] do
235
+ if peek == "="
236
+ create_token('op', '.=') and 2.times { advance }
237
+ else
238
+ create_token('.', '.') and advance
239
+ end
240
+ end
241
+
242
+ for_character ['='] do
243
+ if peek == '='
244
+ if double_peek == '='
245
+ create_token('op', '===') and 3.times { advance }
246
+ else
247
+ create_token('op', '==') and 2.times { advance }
248
+ end
249
+ elsif peek == '>'
250
+ create_token('=>', '=>') and 2.times { advance }
251
+ else
252
+ create_token('=', '=') and advance
253
+ end
254
+ end
255
+
256
+ for_character ['&'] do
257
+ if peek == '&'
258
+ create_token('&&', '&&') and 2.times { advance }
259
+ else
260
+ create_token('&', '&') and advance
261
+ end
262
+ end
263
+
264
+ for_character ['^'] do
265
+ create_token('op', '^') and advance
266
+ end
267
+
268
+ for_characters digits do
269
+ create_token 'num', ''
270
+ switch_to :number
271
+ end
272
+
273
+ for_character ['$'] do
274
+ if peek == '$'
275
+ create_token('variable', '$$') and 2.times { advance }
276
+ else
277
+ create_token('variable', '$') and advance
278
+ end
279
+ switch_to :word
280
+ end
281
+
282
+ for_character letters + underscore do
283
+ create_token 'word', ''
284
+ switch_to :word
285
+ end
286
+
287
+ for_character ['"'] do
288
+ create_token('double_quoted_string', '') and advance
289
+ switch_to :double_string
290
+ end
291
+
292
+ for_character ["'"] do
293
+ create_token('single_quoted_string', "") and advance
294
+ switch_to :single_string
295
+ end
296
+
297
+ for_characters [' ', "\t", "\r"] do
298
+ advance
299
+ end
300
+
301
+ # Dhaka seems to to be introducing literal newlines?
302
+ for_characters ["\\"] do
303
+ if peek == "n"
304
+ 2.times { advance }
305
+ end
306
+ end
307
+
308
+ for_characters ["\n"] do
309
+ advance
310
+ end
311
+ end
312
+
313
+ for_state :number do
314
+ for_characters digits do
315
+ curr_token.value << curr_char and advance
316
+ end
317
+
318
+ for_characters Symbols + whitespace do
319
+ switch_to :php
320
+ end
321
+ end
322
+
323
+ casts = %w{ object int integer bool boolean float double real }
324
+
325
+ for_state :unix_interpreter_directive do
326
+ for_characters letters + Symbols + whitespace + ["/"] do
327
+ advance
328
+ end
329
+
330
+ for_characters ["\n"] do
331
+ switch_to :php
332
+ end
333
+ end
334
+
335
+ for_state :word do
336
+ for_characters Identifiers do
337
+ curr_token.value << curr_char
338
+ # TODO: Get rid of this or turn it into a case/when
339
+ if curr_token.value == "else"
340
+ # This is the special case where we want to parse:
341
+ # elseif
342
+ # as
343
+ # else if
344
+ # As this is easier on our grammar
345
+ curr_token.symbol_name = "else"
346
+ switch_to :php and advance
347
+ elsif curr_token.value =~ /FALSE/i
348
+ # We want to parse special constant 'false' as
349
+ # separate from other words
350
+ #
351
+ curr_token.symbol_name = "false"
352
+ switch_to :php and advance
353
+ elsif curr_token.value =~ /TRUE/i
354
+ # We want to parse special constant 'true' as
355
+ # separate from other words
356
+ #
357
+ curr_token.symbol_name = "true"
358
+ switch_to :php and advance
359
+ else
360
+ # Otherwise we just advance to the next thing
361
+ advance
362
+ end
363
+ end
364
+
365
+ for_characters NonIdentifiers do
366
+ if match = Keywords.detect {|kw| curr_token.value.downcase == kw }
367
+ curr_token.symbol_name = match
368
+ elsif match = casts.detect {|cast| curr_token.value == cast }
369
+ curr_token.symbol_name = "cast"
370
+ elsif curr_token.value == "and"
371
+ curr_token.symbol_name = "op"
372
+ end
373
+ switch_to :php
374
+ end
375
+ end
376
+
377
+ for_state :array_lookup_within_double_string do
378
+ for_default do
379
+ curr_token.value << curr_char and advance
380
+ end
381
+
382
+ for_characters ["]"] do
383
+ curr_token.value << curr_char and advance
384
+ create_token "double_quoted_string_node", ""
385
+ switch_to :double_string
386
+ end
387
+ end
388
+
389
+ for_state :braced_variable_within_double_string do
390
+ for_default do
391
+ curr_token.value << curr_char and advance
392
+ end
393
+
394
+ for_characters ["}"] do
395
+ create_token "double_quoted_string_node", "" and advance
396
+ switch_to :double_string
397
+ end
398
+ end
399
+
400
+ for_state :variable_within_double_string do
401
+ for_characters Identifiers + ["$"] do
402
+ curr_token.value << curr_char and advance
403
+ end
404
+
405
+ for_characters ["["] do
406
+ curr_token.symbol_name = "array_lookup_node"
407
+ curr_token.value << curr_char and advance
408
+ switch_to :array_lookup_within_double_string
409
+ end
410
+
411
+ for_characters NonIdentifiers - ["["] do
412
+ create_token "double_quoted_string_node", curr_char and advance
413
+ switch_to :double_string
414
+ end
415
+
416
+ for_characters ['"'] do
417
+ if reverse_peek == "\\"
418
+ if double_reverse_peek == "\\"
419
+ advance and switch_to :php
420
+ else
421
+ curr_token.value << curr_char and advance
422
+ end
423
+ else
424
+ advance and switch_to :php
425
+ end
426
+ end
427
+ end
428
+
429
+ for_state :double_string do
430
+ for_default do
431
+ curr_token.value << curr_char and advance
432
+ end
433
+
434
+ for_characters ["{"] do
435
+ if peek == "$"
436
+ if curr_token.symbol_name == "double_quoted_string"
437
+ curr_token.symbol_name = "open_double_quoted_string"
438
+ end
439
+ create_token "variable", "" and advance
440
+ switch_to :braced_variable_within_double_string
441
+ else
442
+ curr_token.value << curr_char and advance
443
+ end
444
+ end
445
+
446
+ for_characters ["$"] do
447
+ if curr_token.symbol_name == "double_quoted_string"
448
+ curr_token.symbol_name = "open_double_quoted_string"
449
+ end
450
+ create_token "variable", curr_char and advance
451
+ switch_to :variable_within_double_string
452
+ end
453
+
454
+ for_character ['"'] do
455
+ if reverse_peek == "\\"
456
+ if double_reverse_peek == "\\"
457
+ curr_token.value << curr_char and advance
458
+ switch_to :php
459
+ else
460
+ curr_token.value.gsub!("\\", "")
461
+ curr_token.value << curr_char and advance
462
+ end
463
+ else
464
+ switch_to :php and advance
465
+ end
466
+ end
467
+ end
468
+
469
+ for_state :single_string do
470
+ for_characters AllCharacters - ["'"] do
471
+ curr_token.value << curr_char and advance
472
+ end
473
+
474
+ for_character ["'"] do
475
+ if reverse_peek == "\\"
476
+ curr_token.value << curr_char and advance
477
+ else
478
+ switch_to :php and advance
479
+ end
480
+ end
481
+ end
482
+
483
+ for_state :comment do
484
+ for_characters AllCharacters - newline do
485
+ advance
486
+ end
487
+
488
+ for_characters ["\n", "\r"] do
489
+ advance
490
+ switch_to :php
491
+ end
492
+ end
493
+
494
+ for_state :multiline_comment do
495
+ for_characters AllCharacters - star do
496
+ advance
497
+ end
498
+
499
+ for_characters ['*'] do
500
+ if peek == '/'
501
+ 2.times { advance }
502
+ switch_to :php
503
+ else
504
+ advance
505
+ end
506
+ end
507
+ end
508
+
509
+ # Wow.. this is one crazy hack. I'll have to re-write this
510
+ # at a later date
511
+ for_state :heredoc do
512
+ for_characters AllCharacters do
513
+ @heredoc_identifier_complete ||= false
514
+ @heredoc_identifier ||= ""
515
+
516
+ if @heredoc_identifier_complete
517
+ # We have the identifier. Append text to the current token
518
+ if exactly_matches? @heredoc_identifier
519
+ @heredoc_identifier.length.times { advance }
520
+ @heredoc_identifier_complete, @heredoc_identifier = true, ""
521
+ # We've found the of the heredoc. Go back to idle
522
+ switch_to :php
523
+ else
524
+ # Keep appending..
525
+ curr_token.value << curr_char and advance
526
+ end
527
+ else
528
+ # We haven't parsed the heredoc identifier yet. Parse it
529
+ if curr_char == "\n" or curr_char == "\r"
530
+ # We've reached the end of the heredoc identifier
531
+ # Mark it and create a token for the string
532
+ @heredoc_identifier_complete = true
533
+ create_token "double_quoted_string", "" and advance
534
+ else
535
+ # Just another character in the identifier
536
+ @heredoc_identifier << curr_char and advance
537
+ end
538
+ end
539
+ end
540
+ end
541
+
542
+ end
543
+ end
544
+ end