farleyknight-ionize 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (98) hide show
  1. data/README.rdoc +59 -0
  2. data/Rakefile +5 -0
  3. data/bin/ionize +47 -0
  4. data/lib/ionize.rb +75 -0
  5. data/lib/ionize/environment.rb +56 -0
  6. data/lib/ionize/environment/application.rb +58 -0
  7. data/lib/ionize/environment/php_array.rb +95 -0
  8. data/lib/ionize/parser.rb +272 -0
  9. data/lib/ionize/tokenizer.rb +544 -0
  10. data/lib/ionize/translate.rb +34 -0
  11. data/lib/ionize/translate/composite_string_statements.rb +79 -0
  12. data/lib/ionize/translate/debug.rb +16 -0
  13. data/lib/ionize/translate/ext.rb +47 -0
  14. data/lib/ionize/translate/function_args.rb +132 -0
  15. data/lib/ionize/translate/if_statements.rb +42 -0
  16. data/lib/ionize/translate/multiple_statements.rb +22 -0
  17. data/lib/ionize/translate/php_to_ruby.rb +40 -0
  18. data/lib/ionize/translate/rails_for_php.rb +191 -0
  19. data/lib/ionize/translate/rewritable.rb +133 -0
  20. data/lib/ionize/translate/rewrites.rb +51 -0
  21. data/lib/ionize/translate/statements.rb +622 -0
  22. data/lib/ionize/translate/switch_case_statements.rb +52 -0
  23. data/lib/ionize/translate/term_statements.rb +76 -0
  24. data/lib/ionize/translate/translator.rb +52 -0
  25. data/lib/ionize/version.rb +9 -0
  26. data/spec/fixtures/array_lookup.php +10 -0
  27. data/spec/fixtures/boolean_operators.php +5 -0
  28. data/spec/fixtures/boolean_operators.rb +6 -0
  29. data/spec/fixtures/class_def.php +34 -0
  30. data/spec/fixtures/class_def.rb +34 -0
  31. data/spec/fixtures/dangling_else.php +8 -0
  32. data/spec/fixtures/dangling_else.rb +12 -0
  33. data/spec/fixtures/drupal_1.php +663 -0
  34. data/spec/fixtures/drupal_2.php +1152 -0
  35. data/spec/fixtures/empty_string.php +12 -0
  36. data/spec/fixtures/for_loop.php +17 -0
  37. data/spec/fixtures/for_loop2.php +13 -0
  38. data/spec/fixtures/for_loop3.php +16 -0
  39. data/spec/fixtures/for_loop3.rb +17 -0
  40. data/spec/fixtures/for_loop4.php +5 -0
  41. data/spec/fixtures/for_loop4.rb +6 -0
  42. data/spec/fixtures/foreach.php +9 -0
  43. data/spec/fixtures/foreach2.php +8 -0
  44. data/spec/fixtures/foreach3.php +7 -0
  45. data/spec/fixtures/foreach3.rb +7 -0
  46. data/spec/fixtures/fun_def.php +9 -0
  47. data/spec/fixtures/fun_def2.php +30 -0
  48. data/spec/fixtures/fun_def2.rb +30 -0
  49. data/spec/fixtures/fun_def3.php +33 -0
  50. data/spec/fixtures/fun_def4.php +43 -0
  51. data/spec/fixtures/fun_def4.rb +37 -0
  52. data/spec/fixtures/fun_def5.php +36 -0
  53. data/spec/fixtures/fun_with_if.php +6 -0
  54. data/spec/fixtures/fun_with_if.rb +6 -0
  55. data/spec/fixtures/fun_with_ifs.php +12 -0
  56. data/spec/fixtures/fun_with_ifs.rb +14 -0
  57. data/spec/fixtures/hello_world.php +6 -0
  58. data/spec/fixtures/heredoc.php +6 -0
  59. data/spec/fixtures/heredoc.rb +5 -0
  60. data/spec/fixtures/if.php +6 -0
  61. data/spec/fixtures/if.rb +7 -0
  62. data/spec/fixtures/if_boolean.php +5 -0
  63. data/spec/fixtures/if_boolean.rb +5 -0
  64. data/spec/fixtures/if_else.php +11 -0
  65. data/spec/fixtures/if_else1.php +17 -0
  66. data/spec/fixtures/if_else2.php +8 -0
  67. data/spec/fixtures/if_else3.php +15 -0
  68. data/spec/fixtures/if_else_nested.php +14 -0
  69. data/spec/fixtures/if_else_nested.rb +15 -0
  70. data/spec/fixtures/if_else_series.php +12 -0
  71. data/spec/fixtures/if_else_series.rb +12 -0
  72. data/spec/fixtures/if_not.php +5 -0
  73. data/spec/fixtures/if_not.rb +5 -0
  74. data/spec/fixtures/if_with_brackets.php +7 -0
  75. data/spec/fixtures/if_with_brackets.rb +7 -0
  76. data/spec/fixtures/long_if_else.php +10 -0
  77. data/spec/fixtures/long_if_else.rb +9 -0
  78. data/spec/fixtures/oo.php +16 -0
  79. data/spec/fixtures/php_nuke/sql_layer.php +527 -0
  80. data/spec/fixtures/postop.php +3 -0
  81. data/spec/fixtures/preop.php +7 -0
  82. data/spec/fixtures/simple_fun_def.php +4 -0
  83. data/spec/fixtures/switch_case.php +13 -0
  84. data/spec/fixtures/switch_case.rb +14 -0
  85. data/spec/fixtures/switch_case2.php +25 -0
  86. data/spec/fixtures/switch_case3.php +40 -0
  87. data/spec/fixtures/switch_case3.rb +42 -0
  88. data/spec/fixtures/switch_case4.php +56 -0
  89. data/spec/fixtures/switch_case5.php +71 -0
  90. data/spec/fixtures/switch_case_with_rescue_nil.php +43 -0
  91. data/spec/fixtures/switch_case_with_rescue_nil.rb +35 -0
  92. data/spec/fixtures/tertiary.php +3 -0
  93. data/spec/helper.rb +17 -0
  94. data/spec/php_environment_spec.rb +83 -0
  95. data/spec/php_parser_spec.rb +121 -0
  96. data/spec/php_translator_spec.rb +358 -0
  97. data/spec/rails_for_php_spec.rb +303 -0
  98. metadata +191 -0
@@ -0,0 +1,544 @@
1
+ require 'rubygems'
2
+ require 'dhaka'
3
+
4
+ module Ionize
5
+ module Php
6
+ class Tokenizer < Dhaka::Tokenizer
7
+ digits = ('0'..'9').to_a
8
+ letters = ('A'..'Z').to_a + ('a'..'z').to_a
9
+
10
+ whitespace = [' ']
11
+ dollar = ['$']
12
+ underscore = ['_']
13
+ percent = ['%']
14
+ slash = ['/']
15
+ backslash = ["\\"]
16
+ star = ['*']
17
+ newline = ["\n"]
18
+ tab = ["\t"]
19
+ squotes = ["'"]
20
+ dquotes = ['"']
21
+ dot = ['.']
22
+ equals = ['=']
23
+ plus = ['+']
24
+ minus = ['-']
25
+ at = ['@']
26
+ pipe = ['|']
27
+ greater = [">"]
28
+ less_than = ["<"]
29
+ caret = ["^"]
30
+ ampersand = ["&"]
31
+
32
+ Symbols = ['[', ']', '(', ')', '{', '}', ':', ';', ',', '!', '-']
33
+
34
+ #
35
+ # Why did I do it this way? Might be easier to read as
36
+ # just straight symbols
37
+ #
38
+ AllCharacters = digits + Symbols + whitespace + star +
39
+ underscore + squotes + dquotes + letters + newline + dot +
40
+ tab + equals + dollar + plus + minus + at +
41
+ pipe + greater + less_than + percent + ampersand +
42
+ ["^", "?", '#', "\r", "/", "\\"]
43
+
44
+ Keywords = %w(as and break case class default do echo
45
+ extends else exit for foreach function global
46
+ if include include_once list new or print
47
+ protected private require require_once return
48
+ static switch var while)
49
+
50
+ Whitespace = [" ", "\t", "\r", "\n"]
51
+
52
+ Identifiers = letters + underscore + digits
53
+ NonIdentifiers = Whitespace + ['"', "'", ">", "<", "="] + Symbols +
54
+ ["^", "+", "-", "|", ".", "/", "&", "\n", "\\", "$"]
55
+
56
+ # TODO: Adding these methods might be a sight that I should
57
+ # switch to the Lexeme approach
58
+
59
+ # Just have to figure out Dhaka's own regex engine
60
+
61
+ def peek
62
+ @input[@curr_char_index+1] and @input[@curr_char_index+1].chr
63
+ end
64
+
65
+ def double_peek
66
+ @input[@curr_char_index+2] and @input[@curr_char_index+2].chr
67
+ end
68
+
69
+ def reverse_peek
70
+ @input[@curr_char_index-1] and @input[@curr_char_index-1].chr
71
+ end
72
+
73
+ def double_reverse_peek
74
+ @input[@curr_char_index-2] and @input[@curr_char_index-2].chr
75
+ end
76
+
77
+ def create_keyword keyword
78
+ create_token keyword, keyword
79
+ keyword.length.times { advance }
80
+ end
81
+
82
+ def create_cast cast
83
+ create_token 'cast', cast
84
+ cast.length.times { advance }
85
+ end
86
+
87
+ def exactly_matches? keyword
88
+ @input.slice(@curr_char_index, keyword.length) == keyword
89
+ end
90
+
91
+ for_state :idle_state do
92
+ for_default do
93
+ create_token "html_string", "" and switch_to :html
94
+ end
95
+ end
96
+
97
+ for_state :html do
98
+ for_default do
99
+ curr_token.value << curr_char and advance
100
+ end
101
+
102
+ for_characters "<" do
103
+ if exactly_matches? "<?php"
104
+ create_token "php_start", "<?php" and 5.times { advance }
105
+ switch_to :php
106
+ else
107
+ curr_token.value << curr_char and advance
108
+ end
109
+ end
110
+ end
111
+
112
+ for_state :php do
113
+ for_characters Symbols do
114
+ create_token curr_char, curr_char and advance
115
+ end
116
+
117
+ for_characters [":"] do
118
+ if peek == ":"
119
+ create_token "::", "::" and 2.times { advance }
120
+ else
121
+ create_token curr_char, curr_char and advance
122
+ end
123
+ end
124
+
125
+ for_characters [";"] do
126
+ create_token curr_char, curr_char and advance
127
+ # Double semicolons ";;" sometimes show up
128
+ # Who came up with such an idiom, I dunno
129
+ advance if curr_char == ";"
130
+ end
131
+
132
+ for_characters ["/"] do
133
+ if peek == '/'
134
+ 2.times { advance }
135
+ switch_to :comment
136
+ elsif peek == '*'
137
+ 2.times { advance }
138
+ switch_to :multiline_comment
139
+ else
140
+ create_token 'op', '/'
141
+ advance
142
+ end
143
+ end
144
+
145
+ for_characters ['#'] do
146
+ if peek == '!'
147
+ switch_to :unix_interpreter_directive and 2.times { advance }
148
+ else
149
+ switch_to :comment
150
+ end
151
+ end
152
+
153
+ for_characters ['?'] do
154
+ if peek == ">"
155
+ create_token "php_end", "?>" and 2.times { advance }
156
+ switch_to :idle_state
157
+ else
158
+ create_token '?', '?' and advance
159
+ end
160
+ end
161
+
162
+ for_characters ['@'] do
163
+ create_token '@', '@' and advance
164
+ end
165
+
166
+ for_characters ["!"] do
167
+ if peek == "="
168
+ if double_peek == "="
169
+ create_token "op", "!==" and 3.times { advance }
170
+ else
171
+ create_token "!=", "!=" and 2.times { advance }
172
+ end
173
+ else
174
+ create_token "!", "!" and advance
175
+ end
176
+ end
177
+
178
+ for_character ["|"] do
179
+ if peek == "|"
180
+ create_token "||", "||" and 2.times { advance }
181
+ else
182
+ create_token "op", "|" and advance
183
+ end
184
+ end
185
+
186
+ for_characters ["+"] do
187
+ if peek == "+"
188
+ create_token "++", "++" and 2.times { advance }
189
+ elsif peek == "="
190
+ create_token "op", "+=" and 2.times { advance }
191
+ else
192
+ create_token "+", "+" and advance
193
+ end
194
+ end
195
+
196
+ for_characters ["-"] do
197
+ if peek == "-"
198
+ create_token "--", "--" and 2.times { advance }
199
+ elsif peek == ">"
200
+ create_token "->", "->" and 2.times { advance }
201
+ elsif peek == "="
202
+ create_token "op", "-=" and 2.times { advance }
203
+ else
204
+ create_token "-", "-" and advance
205
+ end
206
+ end
207
+
208
+ for_characters ['%', '*'] do
209
+ create_token('op', curr_char) and advance
210
+ end
211
+
212
+ for_characters ['<'] do
213
+ if exactly_matches? "<?php" or exactly_matches? "<?PHP"
214
+ create_token "php_start", "<?php" and 5.times { advance }
215
+ elsif peek == ">"
216
+ create_token "<>", "<>" and 2.times { advance }
217
+ elsif exactly_matches? "<<<"
218
+ 3.times { advance } and switch_to :heredoc
219
+ elsif peek == '='
220
+ create_token "op", "<=" and 2.times { advance }
221
+ else
222
+ create_token "<", "<" and advance
223
+ end
224
+ end
225
+
226
+ for_characters [">"] do
227
+ if peek == '='
228
+ create_token "op", ">=" and 2.times { advance }
229
+ else
230
+ create_token ">", ">" and advance
231
+ end
232
+ end
233
+
234
+ for_character ['.'] do
235
+ if peek == "="
236
+ create_token('op', '.=') and 2.times { advance }
237
+ else
238
+ create_token('.', '.') and advance
239
+ end
240
+ end
241
+
242
+ for_character ['='] do
243
+ if peek == '='
244
+ if double_peek == '='
245
+ create_token('op', '===') and 3.times { advance }
246
+ else
247
+ create_token('op', '==') and 2.times { advance }
248
+ end
249
+ elsif peek == '>'
250
+ create_token('=>', '=>') and 2.times { advance }
251
+ else
252
+ create_token('=', '=') and advance
253
+ end
254
+ end
255
+
256
+ for_character ['&'] do
257
+ if peek == '&'
258
+ create_token('&&', '&&') and 2.times { advance }
259
+ else
260
+ create_token('&', '&') and advance
261
+ end
262
+ end
263
+
264
+ for_character ['^'] do
265
+ create_token('op', '^') and advance
266
+ end
267
+
268
+ for_characters digits do
269
+ create_token 'num', ''
270
+ switch_to :number
271
+ end
272
+
273
+ for_character ['$'] do
274
+ if peek == '$'
275
+ create_token('variable', '$$') and 2.times { advance }
276
+ else
277
+ create_token('variable', '$') and advance
278
+ end
279
+ switch_to :word
280
+ end
281
+
282
+ for_character letters + underscore do
283
+ create_token 'word', ''
284
+ switch_to :word
285
+ end
286
+
287
+ for_character ['"'] do
288
+ create_token('double_quoted_string', '') and advance
289
+ switch_to :double_string
290
+ end
291
+
292
+ for_character ["'"] do
293
+ create_token('single_quoted_string', "") and advance
294
+ switch_to :single_string
295
+ end
296
+
297
+ for_characters [' ', "\t", "\r"] do
298
+ advance
299
+ end
300
+
301
+ # Dhaka seems to to be introducing literal newlines?
302
+ for_characters ["\\"] do
303
+ if peek == "n"
304
+ 2.times { advance }
305
+ end
306
+ end
307
+
308
+ for_characters ["\n"] do
309
+ advance
310
+ end
311
+ end
312
+
313
+ for_state :number do
314
+ for_characters digits do
315
+ curr_token.value << curr_char and advance
316
+ end
317
+
318
+ for_characters Symbols + whitespace do
319
+ switch_to :php
320
+ end
321
+ end
322
+
323
+ casts = %w{ object int integer bool boolean float double real }
324
+
325
+ for_state :unix_interpreter_directive do
326
+ for_characters letters + Symbols + whitespace + ["/"] do
327
+ advance
328
+ end
329
+
330
+ for_characters ["\n"] do
331
+ switch_to :php
332
+ end
333
+ end
334
+
335
+ for_state :word do
336
+ for_characters Identifiers do
337
+ curr_token.value << curr_char
338
+ # TODO: Get rid of this or turn it into a case/when
339
+ if curr_token.value == "else"
340
+ # This is the special case where we want to parse:
341
+ # elseif
342
+ # as
343
+ # else if
344
+ # As this is easier on our grammar
345
+ curr_token.symbol_name = "else"
346
+ switch_to :php and advance
347
+ elsif curr_token.value =~ /FALSE/i
348
+ # We want to parse special constant 'false' as
349
+ # separate from other words
350
+ #
351
+ curr_token.symbol_name = "false"
352
+ switch_to :php and advance
353
+ elsif curr_token.value =~ /TRUE/i
354
+ # We want to parse special constant 'true' as
355
+ # separate from other words
356
+ #
357
+ curr_token.symbol_name = "true"
358
+ switch_to :php and advance
359
+ else
360
+ # Otherwise we just advance to the next thing
361
+ advance
362
+ end
363
+ end
364
+
365
+ for_characters NonIdentifiers do
366
+ if match = Keywords.detect {|kw| curr_token.value.downcase == kw }
367
+ curr_token.symbol_name = match
368
+ elsif match = casts.detect {|cast| curr_token.value == cast }
369
+ curr_token.symbol_name = "cast"
370
+ elsif curr_token.value == "and"
371
+ curr_token.symbol_name = "op"
372
+ end
373
+ switch_to :php
374
+ end
375
+ end
376
+
377
+ for_state :array_lookup_within_double_string do
378
+ for_default do
379
+ curr_token.value << curr_char and advance
380
+ end
381
+
382
+ for_characters ["]"] do
383
+ curr_token.value << curr_char and advance
384
+ create_token "double_quoted_string_node", ""
385
+ switch_to :double_string
386
+ end
387
+ end
388
+
389
+ for_state :braced_variable_within_double_string do
390
+ for_default do
391
+ curr_token.value << curr_char and advance
392
+ end
393
+
394
+ for_characters ["}"] do
395
+ create_token "double_quoted_string_node", "" and advance
396
+ switch_to :double_string
397
+ end
398
+ end
399
+
400
+ for_state :variable_within_double_string do
401
+ for_characters Identifiers + ["$"] do
402
+ curr_token.value << curr_char and advance
403
+ end
404
+
405
+ for_characters ["["] do
406
+ curr_token.symbol_name = "array_lookup_node"
407
+ curr_token.value << curr_char and advance
408
+ switch_to :array_lookup_within_double_string
409
+ end
410
+
411
+ for_characters NonIdentifiers - ["["] do
412
+ create_token "double_quoted_string_node", curr_char and advance
413
+ switch_to :double_string
414
+ end
415
+
416
+ for_characters ['"'] do
417
+ if reverse_peek == "\\"
418
+ if double_reverse_peek == "\\"
419
+ advance and switch_to :php
420
+ else
421
+ curr_token.value << curr_char and advance
422
+ end
423
+ else
424
+ advance and switch_to :php
425
+ end
426
+ end
427
+ end
428
+
429
+ for_state :double_string do
430
+ for_default do
431
+ curr_token.value << curr_char and advance
432
+ end
433
+
434
+ for_characters ["{"] do
435
+ if peek == "$"
436
+ if curr_token.symbol_name == "double_quoted_string"
437
+ curr_token.symbol_name = "open_double_quoted_string"
438
+ end
439
+ create_token "variable", "" and advance
440
+ switch_to :braced_variable_within_double_string
441
+ else
442
+ curr_token.value << curr_char and advance
443
+ end
444
+ end
445
+
446
+ for_characters ["$"] do
447
+ if curr_token.symbol_name == "double_quoted_string"
448
+ curr_token.symbol_name = "open_double_quoted_string"
449
+ end
450
+ create_token "variable", curr_char and advance
451
+ switch_to :variable_within_double_string
452
+ end
453
+
454
+ for_character ['"'] do
455
+ if reverse_peek == "\\"
456
+ if double_reverse_peek == "\\"
457
+ curr_token.value << curr_char and advance
458
+ switch_to :php
459
+ else
460
+ curr_token.value.gsub!("\\", "")
461
+ curr_token.value << curr_char and advance
462
+ end
463
+ else
464
+ switch_to :php and advance
465
+ end
466
+ end
467
+ end
468
+
469
+ for_state :single_string do
470
+ for_characters AllCharacters - ["'"] do
471
+ curr_token.value << curr_char and advance
472
+ end
473
+
474
+ for_character ["'"] do
475
+ if reverse_peek == "\\"
476
+ curr_token.value << curr_char and advance
477
+ else
478
+ switch_to :php and advance
479
+ end
480
+ end
481
+ end
482
+
483
+ for_state :comment do
484
+ for_characters AllCharacters - newline do
485
+ advance
486
+ end
487
+
488
+ for_characters ["\n", "\r"] do
489
+ advance
490
+ switch_to :php
491
+ end
492
+ end
493
+
494
+ for_state :multiline_comment do
495
+ for_characters AllCharacters - star do
496
+ advance
497
+ end
498
+
499
+ for_characters ['*'] do
500
+ if peek == '/'
501
+ 2.times { advance }
502
+ switch_to :php
503
+ else
504
+ advance
505
+ end
506
+ end
507
+ end
508
+
509
+ # Wow.. this is one crazy hack. I'll have to re-write this
510
+ # at a later date
511
+ for_state :heredoc do
512
+ for_characters AllCharacters do
513
+ @heredoc_identifier_complete ||= false
514
+ @heredoc_identifier ||= ""
515
+
516
+ if @heredoc_identifier_complete
517
+ # We have the identifier. Append text to the current token
518
+ if exactly_matches? @heredoc_identifier
519
+ @heredoc_identifier.length.times { advance }
520
+ @heredoc_identifier_complete, @heredoc_identifier = true, ""
521
+ # We've found the of the heredoc. Go back to idle
522
+ switch_to :php
523
+ else
524
+ # Keep appending..
525
+ curr_token.value << curr_char and advance
526
+ end
527
+ else
528
+ # We haven't parsed the heredoc identifier yet. Parse it
529
+ if curr_char == "\n" or curr_char == "\r"
530
+ # We've reached the end of the heredoc identifier
531
+ # Mark it and create a token for the string
532
+ @heredoc_identifier_complete = true
533
+ create_token "double_quoted_string", "" and advance
534
+ else
535
+ # Just another character in the identifier
536
+ @heredoc_identifier << curr_char and advance
537
+ end
538
+ end
539
+ end
540
+ end
541
+
542
+ end
543
+ end
544
+ end