ruby_parser 3.17.0 → 3.19.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. checksums.yaml +4 -4
  2. checksums.yaml.gz.sig +0 -0
  3. data/History.rdoc +109 -0
  4. data/Manifest.txt +5 -0
  5. data/README.rdoc +9 -6
  6. data/Rakefile +85 -24
  7. data/bin/ruby_parse_extract_error +1 -1
  8. data/compare/normalize.rb +6 -1
  9. data/gauntlet.md +108 -0
  10. data/lib/rp_extensions.rb +15 -36
  11. data/lib/rp_stringscanner.rb +20 -51
  12. data/lib/ruby20_parser.rb +7430 -3528
  13. data/lib/ruby20_parser.y +328 -257
  14. data/lib/ruby21_parser.rb +7408 -3572
  15. data/lib/ruby21_parser.y +323 -254
  16. data/lib/ruby22_parser.rb +7543 -3601
  17. data/lib/ruby22_parser.y +327 -256
  18. data/lib/ruby23_parser.rb +7549 -3612
  19. data/lib/ruby23_parser.y +327 -256
  20. data/lib/ruby24_parser.rb +7640 -3624
  21. data/lib/ruby24_parser.y +327 -256
  22. data/lib/ruby25_parser.rb +7640 -3623
  23. data/lib/ruby25_parser.y +327 -256
  24. data/lib/ruby26_parser.rb +7649 -3632
  25. data/lib/ruby26_parser.y +326 -255
  26. data/lib/ruby27_parser.rb +10132 -4545
  27. data/lib/ruby27_parser.y +871 -262
  28. data/lib/ruby30_parser.rb +10504 -4655
  29. data/lib/ruby30_parser.y +1065 -333
  30. data/lib/ruby31_parser.rb +13622 -0
  31. data/lib/ruby31_parser.y +3481 -0
  32. data/lib/ruby3_parser.yy +3536 -0
  33. data/lib/ruby_lexer.rb +261 -609
  34. data/lib/ruby_lexer.rex +27 -20
  35. data/lib/ruby_lexer.rex.rb +59 -23
  36. data/lib/ruby_lexer_strings.rb +638 -0
  37. data/lib/ruby_parser.rb +2 -0
  38. data/lib/ruby_parser.yy +903 -272
  39. data/lib/ruby_parser_extras.rb +333 -113
  40. data/test/test_ruby_lexer.rb +181 -129
  41. data/test/test_ruby_parser.rb +1529 -288
  42. data/tools/munge.rb +34 -6
  43. data/tools/ripper.rb +15 -10
  44. data.tar.gz.sig +0 -0
  45. metadata +27 -23
  46. metadata.gz.sig +0 -0
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ff6be95e278654e341f5279fed2fd7f0c9a96d93b2fd23ba1ff4b181d593be18
4
- data.tar.gz: ab91b782eb2e77cdd855fa68f4699614b6160ebcca623dd8be25719b410b4206
3
+ metadata.gz: 5e8b4b78bfb538253820b4ff34d49ebf7a91f9722c0ecfb63e3dadb3067530cb
4
+ data.tar.gz: e8e6e9cc42513094304d79a10fb4802706b10cb147084683c5dd3315c2ecde9a
5
5
  SHA512:
6
- metadata.gz: a469da9dadd1eeb35a48dbb34548e70feed8ca83b2d27e41c6bf940cf9dd779622fbddcc4b3c50534f46c6de42f1f085754739d76051413866ee6557fe84050d
7
- data.tar.gz: d182a507b167a6c9af4a7a48e748f55066462888b646a38a08ab12c4365a57645c5216baa2d64c99c8d7b6ebcb4e4b22219a9da9a7d0bd945c00fc21104d7343
6
+ metadata.gz: dd72b30995fbc5ad74f04dedc341572d03b73b962ddd2dd3ecd255c8d1babe1b4e190b780588b846f2c8b8942327ac4a50bda0e8401d02037299f9b16965e69b
7
+ data.tar.gz: 5793d0f9bf5e11f56db8eb5b53fd4b603664e72818b4bc43a3bfb61cf3c4a0c61f9ecee3757350a684271ae9e0c434782ef739ef0f075b3897fa338bcd2680c5
checksums.yaml.gz.sig CHANGED
Binary file
data/History.rdoc CHANGED
@@ -1,3 +1,112 @@
1
+ === 3.19.0 / 2022-03-29
2
+
3
+ * 1 major enhancement:
4
+
5
+ * Added tentative 3.1 support.
6
+
7
+ * 7 minor enhancements:
8
+
9
+ * 3.1: bare RHS assoc: { y: } => s(:hash, s(:lit, :y), nil)
10
+ * 3.1: calls w/ unnamed block args (bare &)
11
+ * 3.1: endless defn/defs w/ paren-less calls (aka commands)
12
+ * 3.1: pattern capture to nonlocal vars, eg: ^@a, ^$b, ^@@c
13
+ * 3.1: pattern: ^(expr) => expr
14
+ * Improved steps for adding new versions.
15
+ * Improved steps for running gauntlets.
16
+
17
+ * 2 bug fixes:
18
+
19
+ * Bumped 2.6+ cached versions for rake compare.
20
+ * Skip test_regexp_esc_C_slash on ruby 3.1.0 because of MRI bug.
21
+
22
+ === 3.18.1 / 2021-11-10
23
+
24
+ * 1 minor enhancement:
25
+
26
+ * All parser tests are now explicitly testing line numbers at every level.
27
+
28
+ * 3 bug fixes:
29
+
30
+ * Fixed endless method with noargs. (mitsuru)
31
+ * Fixed line numbers on some yield forms.
32
+ * Handle and clearly report if unifdef is missing.
33
+
34
+ === 3.18.0 / 2021-10-27
35
+
36
+ Holy crap... 58 commits! 2.7 and 3.0 are feature complete. Strings
37
+ & heredocs have been rewritten.
38
+
39
+ * 9 major enhancements:
40
+
41
+ * !!! Rewrote lexer (and friends) for strings, heredocs, and %*[] constructs.
42
+ * Massive overhaul on line numbers.
43
+ * Freeze input! Finally!!! No more modifying the input string for heredocs.
44
+ * Overhauled RPStringScanner. Removed OLD compatibility methods!
45
+ * Removed Sexp methods: value, to_sym, add, add_all, node_type, values.
46
+ * value moved to sexp_processor.
47
+ * Removed String#grep monkey-patch.
48
+ * Removed String#lineno monkey-patch.
49
+ * Removed string_to_pos, charpos, etc hacks for ancient ruby versions.
50
+ * Removed unread_many... NO! NO EDITING THE INPUT STRING!
51
+
52
+ * 31 minor enhancements:
53
+
54
+ * 2.7/3.0: many more pattern edge cases
55
+ * 2.7: Added `mlhs = rhs rescue expr`
56
+ * 2.7: refactored destructured args (`|(k,v)|`) and unfactored(?!) case_body/args.
57
+ * 3.0: excessed_comma
58
+ * 3.0: finished most everything: endless methods, patterns, etc.
59
+ * 3.0: refactored / added new pattern changes
60
+ * Added RubyLexer#in_heredoc? (ie, is there old_ss ?)
61
+ * Added RubyLexer#old_ss and old_lineno and removed much of SSStack(ish).
62
+ * Added Symbol#end_with? when necessary
63
+ * Added TALLY and DEBUG options for ss.getch and ss.scan
64
+ * Added ignore_body_comments to make parser productions more clear.
65
+ * Added support for no_kwarg (eg `def f(**nil)`).
66
+ * Added support for no_kwarg in blocks (eg `f { |**nil| }`).
67
+ * Augmented generated parser files to have frozen_string_literal comments and fixed tests.
68
+ * Broke out 3.0 parser into its own to ease development.
69
+ * Bumped dependencies on sexp_processor and oedipus_lex.
70
+ * Clean generated 3.x files.
71
+ * Extracted all string scanner methods to their own module.
72
+ * Fixed some precedence decls.
73
+ * Implemented most of pattern matching for 2.7+.
74
+ * Improve lex_state= to report location in verbose debug mode.
75
+ * Made it easier to debug with a particular version of ruby via rake.
76
+ * Make sure ripper uses the same version of ruby we specified.
77
+ * Moved all string/heredoc/etc code to ruby_lexer_strings.rb
78
+ * Remove warning from newer bisons.
79
+ * Sprinkled in some frozen_string_literal, but mostly helped by oedipus bump.
80
+ * Switch to comparing against ruby binary since ripper is buggy.
81
+ * bugs task should try both bug*.rb and bad*.rb.
82
+ * endless methods
83
+ * f_any_kwrest refactoring.
84
+ * refactored defn/defs
85
+
86
+ * 15 bug fixes:
87
+
88
+ * Cleaned a bunch of old hacks. Initializing RubyLexer w/ Parser is cleaner now.
89
+ * Corrected some lex_state errors in process_token_keyword.
90
+ * Fixed ancient ruby2 change (use #lines) in ruby_parse_extract_error.
91
+ * Fixed bug where else without rescue only raises on 2.6+
92
+ * Fixed caller for getch and scan when DEBUG=1
93
+ * Fixed comments in the middle of message cascades.
94
+ * Fixed differences w/ symbol productions against ruby 2.7.
95
+ * Fixed dsym to use string_contents production.
96
+ * Fixed error in bdot2/3 in some edge cases. Fixed p_alt line.
97
+ * Fixed heredoc dedenting in the presence of empty lines. (mvz)
98
+ * Fixed some leading whitespace / comment processing
99
+ * Fixed up how class/module/defn/defs comments were collected.
100
+ * Overhauled ripper.rb to deal with buggy ripper w/ yydebug.
101
+ * Removed dsym from literal.
102
+ * Removed tUBANG lexeme but kept it distinct as a method name (eg: `def !@`).
103
+
104
+ === 3.17.0 / 2021-08-03
105
+
106
+ * 1 minor enhancement:
107
+
108
+ * Added support for arg forwarding (eg `def f(...); m(...); end`) (presidentbeef)
109
+
1
110
  === 3.16.0 / 2021-05-15
2
111
 
3
112
  * 1 major enhancement:
data/Manifest.txt CHANGED
@@ -7,6 +7,7 @@ bin/ruby_parse
7
7
  bin/ruby_parse_extract_error
8
8
  compare/normalize.rb
9
9
  debugging.md
10
+ gauntlet.md
10
11
  lib/.document
11
12
  lib/rp_extensions.rb
12
13
  lib/rp_stringscanner.rb
@@ -28,9 +29,13 @@ lib/ruby27_parser.rb
28
29
  lib/ruby27_parser.y
29
30
  lib/ruby30_parser.rb
30
31
  lib/ruby30_parser.y
32
+ lib/ruby31_parser.rb
33
+ lib/ruby31_parser.y
34
+ lib/ruby3_parser.yy
31
35
  lib/ruby_lexer.rb
32
36
  lib/ruby_lexer.rex
33
37
  lib/ruby_lexer.rex.rb
38
+ lib/ruby_lexer_strings.rb
34
39
  lib/ruby_parser.rb
35
40
  lib/ruby_parser.yy
36
41
  lib/ruby_parser_extras.rb
data/README.rdoc CHANGED
@@ -32,6 +32,10 @@ Tested against 801,039 files from the latest of all rubygems (as of 2013-05):
32
32
  * 1.8 parser is at 99.9739% accuracy, 3.651 sigma
33
33
  * 1.9 parser is at 99.9940% accuracy, 4.013 sigma
34
34
  * 2.0 parser is at 99.9939% accuracy, 4.008 sigma
35
+ * 2.6 parser is at 99.9972% accuracy, 4.191 sigma
36
+ * 3.0 parser has a 100% parse rate.
37
+ * Tested against 2,672,412 unique ruby files across 167k gems.
38
+ * As do all the others now, basically.
35
39
 
36
40
  == FEATURES/PROBLEMS:
37
41
 
@@ -61,15 +65,14 @@ You can also use Ruby19Parser, Ruby18Parser, or RubyParser.for_current_ruby:
61
65
 
62
66
  To add a new version:
63
67
 
64
- * New parser should be generated from lib/ruby_parser.yy.
65
- * Extend lib/ruby_parser.yy with new class name.
66
- * Add new version number to V2 in Rakefile for rule creation.
68
+ * New parser should be generated from lib/ruby[3]_parser.yy.
69
+ * Extend lib/ruby[3]_parser.yy with new class name.
70
+ * Add new version number to V2/V3 in Rakefile for rule creation.
71
+ * Add new (full) version to `ruby_parse` section of Rakefile for rake compare
67
72
  * Require generated parser in lib/ruby_parser.rb.
68
73
  * Add empty TestRubyParserShared##Plus module and TestRubyParserV## to test/test_ruby_parser.rb.
69
74
  * Extend Manifest.txt with generated file names.
70
- * Extend sexp_processor's pt_testcase.rb to match version
71
- * add_19tests needs to have the version added
72
- * VER_RE needs to have the regexp expanded
75
+ * Add new version number to sexp_processor's pt_testcase.rb in all_versions
73
76
 
74
77
  Until all of these are done, you won't have a clean test run.
75
78
 
data/Rakefile CHANGED
@@ -13,17 +13,22 @@ Hoe.add_include_dirs "../../sexp_processor/dev/lib"
13
13
  Hoe.add_include_dirs "../../minitest/dev/lib"
14
14
  Hoe.add_include_dirs "../../oedipus_lex/dev/lib"
15
15
 
16
- V2 = %w[20 21 22 23 24 25 26 27 30]
17
- V2.replace [V2.last] if ENV["FAST"] # HACK
16
+ V2 = %w[20 21 22 23 24 25 26 27]
17
+ V3 = %w[30 31]
18
+
19
+ VERS = V2 + V3
20
+
21
+ ENV["FAST"] = VERS.last if ENV["FAST"] && !VERS.include?(ENV["FAST"])
22
+ VERS.replace [ENV["FAST"]] if ENV["FAST"]
18
23
 
19
24
  Hoe.spec "ruby_parser" do
20
25
  developer "Ryan Davis", "ryand-ruby@zenspider.com"
21
26
 
22
27
  license "MIT"
23
28
 
24
- dependency "sexp_processor", ["~> 4.15", ">= 4.15.1"]
29
+ dependency "sexp_processor", "~> 4.16"
25
30
  dependency "rake", [">= 10", "< 15"], :developer
26
- dependency "oedipus_lex", "~> 2.5", :developer
31
+ dependency "oedipus_lex", "~> 2.6", :developer
27
32
 
28
33
  # NOTE: Ryan!!! Stop trying to fix this dependency! Isolate just
29
34
  # can't handle having a faux-gem half-installed! Stop! Just `gem
@@ -35,11 +40,11 @@ Hoe.spec "ruby_parser" do
35
40
  require_ruby_version [">= 2.1", "< 4"]
36
41
 
37
42
  if plugin? :perforce then # generated files
38
- V2.each do |n|
43
+ VERS.each do |n|
39
44
  self.perforce_ignore << "lib/ruby#{n}_parser.rb"
40
45
  end
41
46
 
42
- V2.each do |n|
47
+ VERS.each do |n|
43
48
  self.perforce_ignore << "lib/ruby#{n}_parser.y"
44
49
  end
45
50
 
@@ -53,8 +58,44 @@ Hoe.spec "ruby_parser" do
53
58
  end
54
59
  end
55
60
 
61
+ def maybe_add_to_top path, string
62
+ file = File.read path
63
+
64
+ return if file.start_with? string
65
+
66
+ warn "Altering top of #{path}"
67
+ tmp_path = "#{path}.tmp"
68
+ File.open(tmp_path, "w") do |f|
69
+ f.puts string
70
+ f.puts
71
+
72
+ f.write file
73
+ # TODO: make this deal with encoding comments properly?
74
+ end
75
+ File.rename tmp_path, path
76
+ end
77
+
78
+ def unifdef?
79
+ @unifdef ||= system("which unifdef") or abort <<~EOM
80
+ unifdef not found!
81
+
82
+ Please install 'unifdef' package on your system or `rake generate` on a mac.
83
+ EOM
84
+ end
85
+
56
86
  V2.each do |n|
57
87
  file "lib/ruby#{n}_parser.y" => "lib/ruby_parser.yy" do |t|
88
+ unifdef?
89
+ cmd = 'unifdef -tk -DV=%s -UDEAD %s > %s || true' % [n, t.source, t.name]
90
+ sh cmd
91
+ end
92
+
93
+ file "lib/ruby#{n}_parser.rb" => "lib/ruby#{n}_parser.y"
94
+ end
95
+
96
+ V3.each do |n|
97
+ file "lib/ruby#{n}_parser.y" => "lib/ruby3_parser.yy" do |t|
98
+ unifdef?
58
99
  cmd = 'unifdef -tk -DV=%s -UDEAD %s > %s || true' % [n, t.source, t.name]
59
100
  sh cmd
60
101
  end
@@ -64,6 +105,12 @@ end
64
105
 
65
106
  file "lib/ruby_lexer.rex.rb" => "lib/ruby_lexer.rex"
66
107
 
108
+ task :parser do |t|
109
+ t.prerequisite_tasks.grep(Rake::FileTask).select(&:already_invoked).each do |f|
110
+ maybe_add_to_top f.name, "# frozen_string_literal: true"
111
+ end
112
+ end
113
+
67
114
  task :generate => [:lexer, :parser]
68
115
 
69
116
  task :clean do
@@ -72,6 +119,7 @@ task :clean do
72
119
  Dir["coverage.info"] +
73
120
  Dir["coverage"] +
74
121
  Dir["lib/ruby2*_parser.y"] +
122
+ Dir["lib/ruby3*_parser.y"] +
75
123
  Dir["lib/*.output"])
76
124
  end
77
125
 
@@ -136,8 +184,8 @@ def ruby_parse version
136
184
 
137
185
  file c_parse_y => c_tarball do
138
186
  in_compare do
139
- extract_glob = case version
140
- when /2\.7|3\.0/
187
+ extract_glob = case
188
+ when version > "2.7" then
141
189
  "{id.h,parse.y,tool/{id2token.rb,lib/vpath.rb}}"
142
190
  else
143
191
  "{id.h,parse.y,tool/{id2token.rb,vpath.rb}}"
@@ -151,7 +199,7 @@ def ruby_parse version
151
199
  sh "expand parse.y > ../#{parse_y}"
152
200
  end
153
201
 
154
- ruby "-pi", "-e", 'gsub(/^%define\s+api\.pure/, "%pure-parser")', "../#{parse_y}"
202
+ ruby "-pi", "-e", 'gsub(/^%pure-parser/, "%define api.pure")', "../#{parse_y}"
155
203
  end
156
204
  sh "rm -rf #{ruby_dir}"
157
205
  end
@@ -210,7 +258,7 @@ task :versions do
210
258
  require "net/http" # avoid require issues in threads
211
259
  require "net/https"
212
260
 
213
- versions = %w[ 2.0 2.1 2.2 2.3 2.4 2.5 2.6 2.7 3.0 ]
261
+ versions = VERS.map { |s| s.split(//).join "." }
214
262
 
215
263
  base_url = "https://cache.ruby-lang.org/pub/ruby"
216
264
 
@@ -243,12 +291,13 @@ ruby_parse "2.2.10"
243
291
  ruby_parse "2.3.8"
244
292
  ruby_parse "2.4.10"
245
293
  ruby_parse "2.5.9"
246
- ruby_parse "2.6.8"
247
- ruby_parse "2.7.4"
248
- ruby_parse "3.0.2"
294
+ ruby_parse "2.6.9"
295
+ ruby_parse "2.7.5"
296
+ ruby_parse "3.0.3"
297
+ ruby_parse "3.1.1"
249
298
 
250
299
  task :debug => :isolate do
251
- ENV["V"] ||= V2.last
300
+ ENV["V"] ||= VERS.last
252
301
  Rake.application[:parser].invoke # this way we can have DEBUG set
253
302
  Rake.application[:lexer].invoke # this way we can have DEBUG set
254
303
 
@@ -263,7 +312,7 @@ task :debug => :isolate do
263
312
  time = (ENV["RP_TIMEOUT"] || 10).to_i
264
313
 
265
314
  n = ENV["BUG"]
266
- file = (n && "bug#{n}.rb") || ENV["F"] || ENV["FILE"] || "bug.rb"
315
+ file = (n && "bug#{n}.rb") || ENV["F"] || ENV["FILE"] || "debug.rb"
267
316
  ruby = ENV["R"] || ENV["RUBY"]
268
317
 
269
318
  if ruby then
@@ -286,19 +335,22 @@ task :debug => :isolate do
286
335
  end
287
336
 
288
337
  task :debug3 do
289
- file = ENV["F"] || "bug.rb"
290
- verbose = ENV["V"] ? "-v" : ""
338
+ file = ENV["F"] || "debug.rb"
339
+ version = ENV["V"] || ""
340
+ verbose = ENV["VERBOSE"] ? "-v" : ""
291
341
  munge = "./tools/munge.rb #{verbose}"
292
342
 
293
343
  abort "Need a file to parse, via: F=path.rb" unless file
294
344
 
295
345
  ENV.delete "V"
296
346
 
297
- sh "ruby -v"
298
- sh "ruby -y #{file} 2>&1 | #{munge} > tmp/ruby"
299
- sh "./tools/ripper.rb -d #{file} | #{munge} > tmp/rip"
347
+ ruby = "ruby#{version}"
348
+
349
+ sh "#{ruby} -v"
350
+ sh "#{ruby} -y #{file} 2>&1 | #{munge} > tmp/ruby"
351
+ sh "#{ruby} ./tools/ripper.rb -d #{file} | #{munge} > tmp/rip"
300
352
  sh "rake debug F=#{file} DEBUG=1 2>&1 | #{munge} > tmp/rp"
301
- sh "diff -U 999 -d tmp/{rip,rp}"
353
+ sh "diff -U 999 -d tmp/{ruby,rp}"
302
354
  end
303
355
 
304
356
  task :cmp do
@@ -310,16 +362,25 @@ task :cmp3 do
310
362
  end
311
363
 
312
364
  task :extract => :isolate do
313
- ENV["V"] ||= V2.last
365
+ ENV["V"] ||= VERS.last
314
366
  Rake.application[:parser].invoke # this way we can have DEBUG set
315
367
 
316
- file = ENV["F"] || ENV["FILE"]
368
+ file = ENV["F"] || ENV["FILE"] || abort("Need to provide F=<path>")
317
369
 
318
370
  ruby "-Ilib", "bin/ruby_parse_extract_error", file
319
371
  end
320
372
 
373
+ task :parse => :isolate do
374
+ ENV["V"] ||= VERS.last
375
+ Rake.application[:parser].invoke # this way we can have DEBUG set
376
+
377
+ file = ENV["F"] || ENV["FILE"] || abort("Need to provide F=<path>")
378
+
379
+ ruby "-Ilib", "bin/ruby_parse", file
380
+ end
381
+
321
382
  task :bugs do
322
- sh "for f in bug*.rb ; do #{Gem.ruby} -S rake debug F=$f && rm $f ; done"
383
+ sh "for f in bug*.rb bad*.rb ; do #{Gem.ruby} -S rake debug F=$f && rm $f ; done"
323
384
  end
324
385
 
325
386
  # vim: syntax=Ruby
@@ -21,7 +21,7 @@ class RubyParser
21
21
  src = ss.string
22
22
  pre_error = src[0...ss.pos]
23
23
 
24
- defs = pre_error.grep(/^ *(?:def|it)/)
24
+ defs = pre_error.lines.grep(/^ *(?:def|it)/)
25
25
 
26
26
  raise "can't figure out where the bad code starts" unless defs.last
27
27
 
data/compare/normalize.rb CHANGED
@@ -84,6 +84,7 @@ def munge s
84
84
 
85
85
  "' '", "tSPACE", # needs to be later to avoid bad hits
86
86
 
87
+ "%empty", "none", # newer bison
87
88
  "/* empty */", "none",
88
89
  /^\s*$/, "none",
89
90
 
@@ -140,6 +141,7 @@ def munge s
140
141
  '"do for block"', "kDO_BLOCK",
141
142
  '"do for condition"', "kDO_COND",
142
143
  '"do for lambda"', "kDO_LAMBDA",
144
+ "tLABEL", "kLABEL",
143
145
 
144
146
  # UGH
145
147
  "k_LINE__", "k__LINE__",
@@ -155,7 +157,10 @@ def munge s
155
157
  /\"(\w+) \(?modifier\)?\"/, proc { |x| "k#{$1.upcase}_MOD" },
156
158
  /\"(\w+)\"/, proc { |x| "k#{$1.upcase}" },
157
159
 
158
- /@(\d+)(\s+|$)/, "",
160
+ /\$?@(\d+)(\s+|$)/, "", # newer bison
161
+
162
+ # TODO: remove for 3.0 work:
163
+ "lex_ctxt ", "" # 3.0 production that's mostly noise right now
159
164
  ]
160
165
 
161
166
  renames.each_slice(2) do |(a, b)|
data/gauntlet.md ADDED
@@ -0,0 +1,108 @@
1
+ # Running the Gauntlet
2
+
3
+ ## Maintaining a Gem Mirror
4
+
5
+ I use rubygems-mirror to keep an archive of all the latest rubygems on
6
+ an external disk. Here is the config:
7
+
8
+ ```
9
+ ---
10
+ - from: https://rubygems.org
11
+ to: /Volumes/StuffA/gauntlet/mirror
12
+ parallelism: 10
13
+ retries: 3
14
+ delete: true
15
+ skiperror: true
16
+ hashdir: true
17
+ ```
18
+
19
+ And I update using rake:
20
+
21
+ ```
22
+ % cd GIT/rubygems/rubygems-mirror
23
+ % git down
24
+ % rake mirror:latest
25
+ % /Volumes/StuffA/gauntlet/bin/cleanup.rb -y -v
26
+ ```
27
+
28
+ This rather quickly updates my mirror to the latest versions of
29
+ everything and then deletes all old versions. I then run a cleanup
30
+ script that fixes the file dates to their publication date and deletes
31
+ any gems that have invalid specs. This can argue with the mirror a
32
+ bit, but it is pretty minimal (currently ~20 bad gems).
33
+
34
+ ## Curating an Archive of Ruby Files
35
+
36
+ Next, I process the gem mirror into a much more digestable structure
37
+ using `unpack_gems.rb`.
38
+
39
+ ```
40
+ % cd RP/gauntlet
41
+ % time caffeinate /Volumes/StuffA/gauntlet/bin/unpack_gems.rb -v [-a] ; say done
42
+ ... waaaait ...
43
+ % DIR=gauntlet.$(today).(all|new).noindex
44
+ % mv hashed.noindex $DIR
45
+ % tar c $DIR | zstd -5 -T0 --long > archives/$DIR.tar.zst
46
+ % tar vc -T <(fd . $DIR | sort) | zstd -5 -T0 --long > archives/$DIR.tar.zst
47
+ % ./bin/sync.sh
48
+ ```
49
+
50
+ This script filters all the newer (< 1 year old) gems (unless `-a` is
51
+ used), unpacks them, finds all the files that look like they're valid
52
+ ruby, ensures they're valid ruby (using the current version of ruby to
53
+ compile them), and then moves them into a SHA dir structure that looks
54
+ something like this:
55
+
56
+ ```
57
+ hashed.noindex/a/b/c/<full_file_sha>.rb
58
+ ```
59
+
60
+ This removes all duplicates and puts everything in a fairly even,
61
+ wide, flat directory layout.
62
+
63
+ This process takes a very long time, even with a lot of
64
+ parallelization. There are currently about 160k gems in the mirror.
65
+ Unpacking, validating, SHA'ing everything is disk and CPU intensive.
66
+ The `.noindex` extension stops spotlight from indexing the continous
67
+ churn of files being unpacked and moved and saves time.
68
+
69
+ Finally, I rename and archive it all up (currently using lrztar, but
70
+ I'm not in love with it).
71
+
72
+ ### Stats
73
+
74
+ ```
75
+ 9696 % find gauntlet.$(today).noindex -type f | lc
76
+ 561270
77
+ 3.5G gauntlet.2021-08-06.noindex
78
+ 239M gauntlet.2021-08-06.noindex.tar.lrz
79
+ ```
80
+
81
+ So I wind up with a little over half a million unique ruby files to
82
+ parse. It's about 3.5g but compresses very nicely down to 240m
83
+
84
+ ## Running the Gauntlet
85
+
86
+ Assuming you're starting from scratch, unpack the archive once:
87
+
88
+ ```
89
+ % lrzuntar gauntlet.$(today).noindex.lrz
90
+ ```
91
+
92
+ Then, either run a single process (easier to read):
93
+
94
+ ```
95
+ % ./gauntlet/bin/gauntlet.rb gauntlet/*.noindex/?
96
+ ```
97
+
98
+ Or max out your machine using xargs (note the `-P 16` and choose accordingly):
99
+
100
+ ```
101
+ % ls -d gauntlet/*.noindex/?/? | xargs -n 1 -P 16 ./gauntlet/bin/gauntlet.rb
102
+ ```
103
+
104
+ In another terminal I usually monitor the progress like so:
105
+
106
+ ```
107
+ % while true ; do clear; fd . -t d -t e gauntlet/*.noindex -X rmdir -p 2> /dev/null ; for D in gauntlet/*.noindex/? ; do echo -n "$D: "; fd .rb $D | wc -l ; done ; echo ; sleep 30 ; done
108
+ ```
data/lib/rp_extensions.rb CHANGED
@@ -12,26 +12,24 @@ class Regexp
12
12
  end
13
13
  # :startdoc:
14
14
 
15
- ############################################################
16
- # HACK HACK HACK HACK HACK HACK HACK HACK HACK HACK HACK HACK
17
-
18
- unless "".respond_to?(:grep) then
19
- class String
20
- def grep re
21
- lines.grep re
22
- end
15
+ class Array
16
+ def prepend *vals
17
+ self[0,0] = vals
23
18
  end
24
- end
19
+ end unless [].respond_to?(:prepend)
25
20
 
26
- class String
27
- ##
28
- # This is a hack used by the lexer to sneak in line numbers at the
29
- # identifier level. This should be MUCH smaller than making
30
- # process_token return [value, lineno] and modifying EVERYTHING that
31
- # reduces tIDENTIFIER.
21
+ # :stopdoc:
22
+ class Symbol
23
+ def end_with? o
24
+ self.to_s.end_with? o
25
+ end
26
+ end unless :woot.respond_to?(:end_with?)
27
+ # :startdoc:
32
28
 
33
- attr_accessor :lineno
29
+ ############################################################
30
+ # HACK HACK HACK HACK HACK HACK HACK HACK HACK HACK HACK HACK
34
31
 
32
+ class String
35
33
  def clean_caller
36
34
  self.sub(File.dirname(__FILE__), "./lib").sub(/:in.*/, "")
37
35
  end if $DEBUG
@@ -40,34 +38,15 @@ end
40
38
  require "sexp"
41
39
 
42
40
  class Sexp
43
- attr_writer :paren
41
+ attr_writer :paren # TODO: retire
44
42
 
45
43
  def paren
46
44
  @paren ||= false
47
45
  end
48
46
 
49
- def value
50
- raise "multi item sexp" if size > 2
51
- last
52
- end
53
-
54
- def to_sym
55
- raise "no: #{self.inspect}.to_sym is a bug"
56
- self.value.to_sym
57
- end
58
-
59
- alias :add :<<
60
-
61
- def add_all x
62
- self.concat x.sexp_body
63
- end
64
-
65
47
  def block_pass?
66
48
  any? { |s| Sexp === s && s.sexp_type == :block_pass }
67
49
  end
68
-
69
- alias :node_type :sexp_type
70
- alias :values :sexp_body # TODO: retire
71
50
  end
72
51
 
73
52
  # END HACK