ruby_parser 3.17.0 → 3.18.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/History.rdoc +76 -0
- data/Manifest.txt +3 -0
- data/README.rdoc +1 -0
- data/Rakefile +68 -18
- data/bin/ruby_parse_extract_error +1 -1
- data/compare/normalize.rb +6 -1
- data/gauntlet.md +106 -0
- data/lib/rp_extensions.rb +15 -36
- data/lib/rp_stringscanner.rb +20 -51
- data/lib/ruby20_parser.rb +3445 -3394
- data/lib/ruby20_parser.y +326 -248
- data/lib/ruby21_parser.rb +3543 -3511
- data/lib/ruby21_parser.y +321 -245
- data/lib/ruby22_parser.rb +3553 -3512
- data/lib/ruby22_parser.y +325 -247
- data/lib/ruby23_parser.rb +3566 -3530
- data/lib/ruby23_parser.y +325 -247
- data/lib/ruby24_parser.rb +3595 -3548
- data/lib/ruby24_parser.y +325 -247
- data/lib/ruby25_parser.rb +3595 -3547
- data/lib/ruby25_parser.y +325 -247
- data/lib/ruby26_parser.rb +3605 -3560
- data/lib/ruby26_parser.y +324 -246
- data/lib/ruby27_parser.rb +4657 -3539
- data/lib/ruby27_parser.y +878 -253
- data/lib/ruby30_parser.rb +5230 -3882
- data/lib/ruby30_parser.y +1069 -321
- data/lib/ruby3_parser.yy +3467 -0
- data/lib/ruby_lexer.rb +261 -609
- data/lib/ruby_lexer.rex +27 -20
- data/lib/ruby_lexer.rex.rb +59 -23
- data/lib/ruby_lexer_strings.rb +638 -0
- data/lib/ruby_parser.yy +910 -263
- data/lib/ruby_parser_extras.rb +289 -114
- data/test/test_ruby_lexer.rb +181 -129
- data/test/test_ruby_parser.rb +1213 -108
- data/tools/munge.rb +34 -6
- data/tools/ripper.rb +15 -10
- data.tar.gz.sig +0 -0
- metadata +11 -12
- metadata.gz.sig +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 36780d9d3244dd62d13430987076d5e81ae2e536d6d2bfd259f8a612da3d94cc
|
4
|
+
data.tar.gz: bec4b32e7f7a8d9ae8e3202f30230f351a2fedc6e2ac4e984260486dbb7529c6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f28d02d2b14687e365bab3a353348b93a9df993be2d1afd3f2783b5b97ca016a6ca2f834ef61ebb4a4eae3decc38e1351349679f951f901bef09c25f23d44322
|
7
|
+
data.tar.gz: 276ecce4db1f72ed2ce0d276679e65419225a46b885d0050aa7ba6382b45033ccd24b5006a0d382f0aecdbb6c5a5fd93e3e826adeafccc3c47ee051b76772eee
|
checksums.yaml.gz.sig
CHANGED
Binary file
|
data/History.rdoc
CHANGED
@@ -1,3 +1,79 @@
|
|
1
|
+
=== 3.18.0 / 2021-10-27
|
2
|
+
|
3
|
+
Holy crap... 58 commits! 2.7 and 3.0 are feature complete. Strings
|
4
|
+
& heredocs have been rewritten.
|
5
|
+
|
6
|
+
* 9 major enhancements:
|
7
|
+
|
8
|
+
* !!! Rewrote lexer (and friends) for strings, heredocs, and %*[] constructs.
|
9
|
+
* Massive overhaul on line numbers.
|
10
|
+
* Freeze input! Finally!!! No more modifying the input string for heredocs.
|
11
|
+
* Overhauled RPStringScanner. Removed OLD compatibility methods!
|
12
|
+
* Removed Sexp methods: value, to_sym, add, add_all, node_type, values.
|
13
|
+
* value moved to sexp_processor.
|
14
|
+
* Removed String#grep monkey-patch.
|
15
|
+
* Removed String#lineno monkey-patch.
|
16
|
+
* Removed string_to_pos, charpos, etc hacks for ancient ruby versions.
|
17
|
+
* Removed unread_many... NO! NO EDITING THE INPUT STRING!
|
18
|
+
|
19
|
+
* 31 minor enhancements:
|
20
|
+
|
21
|
+
* 2.7/3.0: many more pattern edge cases
|
22
|
+
* 2.7: Added `mlhs = rhs rescue expr`
|
23
|
+
* 2.7: refactored destructured args (`|(k,v)|`) and unfactored(?!) case_body/args.
|
24
|
+
* 3.0: excessed_comma
|
25
|
+
* 3.0: finished most everything: endless methods, patterns, etc.
|
26
|
+
* 3.0: refactored / added new pattern changes
|
27
|
+
* Added RubyLexer#in_heredoc? (ie, is there old_ss ?)
|
28
|
+
* Added RubyLexer#old_ss and old_lineno and removed much of SSStack(ish).
|
29
|
+
* Added Symbol#end_with? when necessary
|
30
|
+
* Added TALLY and DEBUG options for ss.getch and ss.scan
|
31
|
+
* Added ignore_body_comments to make parser productions more clear.
|
32
|
+
* Added support for no_kwarg (eg `def f(**nil)`).
|
33
|
+
* Added support for no_kwarg in blocks (eg `f { |**nil| }`).
|
34
|
+
* Augmented generated parser files to have frozen_string_literal comments and fixed tests.
|
35
|
+
* Broke out 3.0 parser into its own to ease development.
|
36
|
+
* Bumped dependencies on sexp_processor and oedipus_lex.
|
37
|
+
* Clean generated 3.x files.
|
38
|
+
* Extracted all string scanner methods to their own module.
|
39
|
+
* Fixed some precedence decls.
|
40
|
+
* Implemented most of pattern matching for 2.7+.
|
41
|
+
* Improve lex_state= to report location in verbose debug mode.
|
42
|
+
* Made it easier to debug with a particular version of ruby via rake.
|
43
|
+
* Make sure ripper uses the same version of ruby we specified.
|
44
|
+
* Moved all string/heredoc/etc code to ruby_lexer_strings.rb
|
45
|
+
* Remove warning from newer bisons.
|
46
|
+
* Sprinkled in some frozen_string_literal, but mostly helped by oedipus bump.
|
47
|
+
* Switch to comparing against ruby binary since ripper is buggy.
|
48
|
+
* bugs task should try both bug*.rb and bad*.rb.
|
49
|
+
* endless methods
|
50
|
+
* f_any_kwrest refactoring.
|
51
|
+
* refactored defn/defs
|
52
|
+
|
53
|
+
* 15 bug fixes:
|
54
|
+
|
55
|
+
* Cleaned a bunch of old hacks. Initializing RubyLexer w/ Parser is cleaner now.
|
56
|
+
* Corrected some lex_state errors in process_token_keyword.
|
57
|
+
* Fixed ancient ruby2 change (use #lines) in ruby_parse_extract_error.
|
58
|
+
* Fixed bug where else without rescue only raises on 2.6+
|
59
|
+
* Fixed caller for getch and scan when DEBUG=1
|
60
|
+
* Fixed comments in the middle of message cascades.
|
61
|
+
* Fixed differences w/ symbol productions against ruby 2.7.
|
62
|
+
* Fixed dsym to use string_contents production.
|
63
|
+
* Fixed error in bdot2/3 in some edge cases. Fixed p_alt line.
|
64
|
+
* Fixed heredoc dedenting in the presence of empty lines. (mvz)
|
65
|
+
* Fixed some leading whitespace / comment processing
|
66
|
+
* Fixed up how class/module/defn/defs comments were collected.
|
67
|
+
* Overhauled ripper.rb to deal with buggy ripper w/ yydebug.
|
68
|
+
* Removed dsym from literal.
|
69
|
+
* Removed tUBANG lexeme but kept it distinct as a method name (eg: `def !@`).
|
70
|
+
|
71
|
+
=== 3.17.0 / 2021-08-03
|
72
|
+
|
73
|
+
* 1 minor enhancement:
|
74
|
+
|
75
|
+
* Added support for arg forwarding (eg `def f(...); m(...); end`) (presidentbeef)
|
76
|
+
|
1
77
|
=== 3.16.0 / 2021-05-15
|
2
78
|
|
3
79
|
* 1 major enhancement:
|
data/Manifest.txt
CHANGED
@@ -7,6 +7,7 @@ bin/ruby_parse
|
|
7
7
|
bin/ruby_parse_extract_error
|
8
8
|
compare/normalize.rb
|
9
9
|
debugging.md
|
10
|
+
gauntlet.md
|
10
11
|
lib/.document
|
11
12
|
lib/rp_extensions.rb
|
12
13
|
lib/rp_stringscanner.rb
|
@@ -28,9 +29,11 @@ lib/ruby27_parser.rb
|
|
28
29
|
lib/ruby27_parser.y
|
29
30
|
lib/ruby30_parser.rb
|
30
31
|
lib/ruby30_parser.y
|
32
|
+
lib/ruby3_parser.yy
|
31
33
|
lib/ruby_lexer.rb
|
32
34
|
lib/ruby_lexer.rex
|
33
35
|
lib/ruby_lexer.rex.rb
|
36
|
+
lib/ruby_lexer_strings.rb
|
34
37
|
lib/ruby_parser.rb
|
35
38
|
lib/ruby_parser.yy
|
36
39
|
lib/ruby_parser_extras.rb
|
data/README.rdoc
CHANGED
@@ -32,6 +32,7 @@ Tested against 801,039 files from the latest of all rubygems (as of 2013-05):
|
|
32
32
|
* 1.8 parser is at 99.9739% accuracy, 3.651 sigma
|
33
33
|
* 1.9 parser is at 99.9940% accuracy, 4.013 sigma
|
34
34
|
* 2.0 parser is at 99.9939% accuracy, 4.008 sigma
|
35
|
+
* 2.6 parser is at 99.9972% accuracy, 4.191 sigma
|
35
36
|
|
36
37
|
== FEATURES/PROBLEMS:
|
37
38
|
|
data/Rakefile
CHANGED
@@ -13,17 +13,22 @@ Hoe.add_include_dirs "../../sexp_processor/dev/lib"
|
|
13
13
|
Hoe.add_include_dirs "../../minitest/dev/lib"
|
14
14
|
Hoe.add_include_dirs "../../oedipus_lex/dev/lib"
|
15
15
|
|
16
|
-
V2 = %w[20 21 22 23 24 25 26 27
|
17
|
-
|
16
|
+
V2 = %w[20 21 22 23 24 25 26 27]
|
17
|
+
V3 = %w[30]
|
18
|
+
|
19
|
+
VERS = V2 + V3
|
20
|
+
|
21
|
+
ENV["FAST"] = VERS.last if ENV["FAST"] && !VERS.include?(ENV["FAST"])
|
22
|
+
VERS.replace [ENV["FAST"]] if ENV["FAST"]
|
18
23
|
|
19
24
|
Hoe.spec "ruby_parser" do
|
20
25
|
developer "Ryan Davis", "ryand-ruby@zenspider.com"
|
21
26
|
|
22
27
|
license "MIT"
|
23
28
|
|
24
|
-
dependency "sexp_processor",
|
29
|
+
dependency "sexp_processor", "~> 4.16"
|
25
30
|
dependency "rake", [">= 10", "< 15"], :developer
|
26
|
-
dependency "oedipus_lex", "~> 2.
|
31
|
+
dependency "oedipus_lex", "~> 2.6", :developer
|
27
32
|
|
28
33
|
# NOTE: Ryan!!! Stop trying to fix this dependency! Isolate just
|
29
34
|
# can't handle having a faux-gem half-installed! Stop! Just `gem
|
@@ -35,11 +40,11 @@ Hoe.spec "ruby_parser" do
|
|
35
40
|
require_ruby_version [">= 2.1", "< 4"]
|
36
41
|
|
37
42
|
if plugin? :perforce then # generated files
|
38
|
-
|
43
|
+
VERS.each do |n|
|
39
44
|
self.perforce_ignore << "lib/ruby#{n}_parser.rb"
|
40
45
|
end
|
41
46
|
|
42
|
-
|
47
|
+
VERS.each do |n|
|
43
48
|
self.perforce_ignore << "lib/ruby#{n}_parser.y"
|
44
49
|
end
|
45
50
|
|
@@ -53,6 +58,23 @@ Hoe.spec "ruby_parser" do
|
|
53
58
|
end
|
54
59
|
end
|
55
60
|
|
61
|
+
def maybe_add_to_top path, string
|
62
|
+
file = File.read path
|
63
|
+
|
64
|
+
return if file.start_with? string
|
65
|
+
|
66
|
+
warn "Altering top of #{path}"
|
67
|
+
tmp_path = "#{path}.tmp"
|
68
|
+
File.open(tmp_path, "w") do |f|
|
69
|
+
f.puts string
|
70
|
+
f.puts
|
71
|
+
|
72
|
+
f.write file
|
73
|
+
# TODO: make this deal with encoding comments properly?
|
74
|
+
end
|
75
|
+
File.rename tmp_path, path
|
76
|
+
end
|
77
|
+
|
56
78
|
V2.each do |n|
|
57
79
|
file "lib/ruby#{n}_parser.y" => "lib/ruby_parser.yy" do |t|
|
58
80
|
cmd = 'unifdef -tk -DV=%s -UDEAD %s > %s || true' % [n, t.source, t.name]
|
@@ -62,8 +84,23 @@ V2.each do |n|
|
|
62
84
|
file "lib/ruby#{n}_parser.rb" => "lib/ruby#{n}_parser.y"
|
63
85
|
end
|
64
86
|
|
87
|
+
V3.each do |n|
|
88
|
+
file "lib/ruby#{n}_parser.y" => "lib/ruby3_parser.yy" do |t|
|
89
|
+
cmd = 'unifdef -tk -DV=%s -UDEAD %s > %s || true' % [n, t.source, t.name]
|
90
|
+
sh cmd
|
91
|
+
end
|
92
|
+
|
93
|
+
file "lib/ruby#{n}_parser.rb" => "lib/ruby#{n}_parser.y"
|
94
|
+
end
|
95
|
+
|
65
96
|
file "lib/ruby_lexer.rex.rb" => "lib/ruby_lexer.rex"
|
66
97
|
|
98
|
+
task :parser do |t|
|
99
|
+
t.prerequisite_tasks.grep(Rake::FileTask).select(&:already_invoked).each do |f|
|
100
|
+
maybe_add_to_top f.name, "# frozen_string_literal: true"
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
67
104
|
task :generate => [:lexer, :parser]
|
68
105
|
|
69
106
|
task :clean do
|
@@ -72,6 +109,7 @@ task :clean do
|
|
72
109
|
Dir["coverage.info"] +
|
73
110
|
Dir["coverage"] +
|
74
111
|
Dir["lib/ruby2*_parser.y"] +
|
112
|
+
Dir["lib/ruby3*_parser.y"] +
|
75
113
|
Dir["lib/*.output"])
|
76
114
|
end
|
77
115
|
|
@@ -151,7 +189,7 @@ def ruby_parse version
|
|
151
189
|
sh "expand parse.y > ../#{parse_y}"
|
152
190
|
end
|
153
191
|
|
154
|
-
ruby "-pi", "-e", 'gsub(/^%
|
192
|
+
ruby "-pi", "-e", 'gsub(/^%pure-parser/, "%define api.pure")', "../#{parse_y}"
|
155
193
|
end
|
156
194
|
sh "rm -rf #{ruby_dir}"
|
157
195
|
end
|
@@ -248,7 +286,7 @@ ruby_parse "2.7.4"
|
|
248
286
|
ruby_parse "3.0.2"
|
249
287
|
|
250
288
|
task :debug => :isolate do
|
251
|
-
ENV["V"] ||=
|
289
|
+
ENV["V"] ||= VERS.last
|
252
290
|
Rake.application[:parser].invoke # this way we can have DEBUG set
|
253
291
|
Rake.application[:lexer].invoke # this way we can have DEBUG set
|
254
292
|
|
@@ -263,7 +301,7 @@ task :debug => :isolate do
|
|
263
301
|
time = (ENV["RP_TIMEOUT"] || 10).to_i
|
264
302
|
|
265
303
|
n = ENV["BUG"]
|
266
|
-
file = (n && "bug#{n}.rb") || ENV["F"] || ENV["FILE"] || "
|
304
|
+
file = (n && "bug#{n}.rb") || ENV["F"] || ENV["FILE"] || "debug.rb"
|
267
305
|
ruby = ENV["R"] || ENV["RUBY"]
|
268
306
|
|
269
307
|
if ruby then
|
@@ -286,19 +324,22 @@ task :debug => :isolate do
|
|
286
324
|
end
|
287
325
|
|
288
326
|
task :debug3 do
|
289
|
-
file = ENV["F"] || "
|
290
|
-
|
327
|
+
file = ENV["F"] || "debug.rb"
|
328
|
+
version = ENV["V"] || ""
|
329
|
+
verbose = ENV["VERBOSE"] ? "-v" : ""
|
291
330
|
munge = "./tools/munge.rb #{verbose}"
|
292
331
|
|
293
332
|
abort "Need a file to parse, via: F=path.rb" unless file
|
294
333
|
|
295
334
|
ENV.delete "V"
|
296
335
|
|
297
|
-
|
298
|
-
|
299
|
-
sh "
|
336
|
+
ruby = "ruby#{version}"
|
337
|
+
|
338
|
+
sh "#{ruby} -v"
|
339
|
+
sh "#{ruby} -y #{file} 2>&1 | #{munge} > tmp/ruby"
|
340
|
+
sh "#{ruby} ./tools/ripper.rb -d #{file} | #{munge} > tmp/rip"
|
300
341
|
sh "rake debug F=#{file} DEBUG=1 2>&1 | #{munge} > tmp/rp"
|
301
|
-
sh "diff -U 999 -d tmp/{
|
342
|
+
sh "diff -U 999 -d tmp/{ruby,rp}"
|
302
343
|
end
|
303
344
|
|
304
345
|
task :cmp do
|
@@ -310,16 +351,25 @@ task :cmp3 do
|
|
310
351
|
end
|
311
352
|
|
312
353
|
task :extract => :isolate do
|
313
|
-
ENV["V"] ||=
|
354
|
+
ENV["V"] ||= VERS.last
|
314
355
|
Rake.application[:parser].invoke # this way we can have DEBUG set
|
315
356
|
|
316
|
-
file = ENV["F"] || ENV["FILE"]
|
357
|
+
file = ENV["F"] || ENV["FILE"] || abort("Need to provide F=<path>")
|
317
358
|
|
318
359
|
ruby "-Ilib", "bin/ruby_parse_extract_error", file
|
319
360
|
end
|
320
361
|
|
362
|
+
task :parse => :isolate do
|
363
|
+
ENV["V"] ||= VERS.last
|
364
|
+
Rake.application[:parser].invoke # this way we can have DEBUG set
|
365
|
+
|
366
|
+
file = ENV["F"] || ENV["FILE"] || abort("Need to provide F=<path>")
|
367
|
+
|
368
|
+
ruby "-Ilib", "bin/ruby_parse", file
|
369
|
+
end
|
370
|
+
|
321
371
|
task :bugs do
|
322
|
-
sh "for f in bug*.rb ; do #{Gem.ruby} -S rake debug F=$f && rm $f ; done"
|
372
|
+
sh "for f in bug*.rb bad*.rb ; do #{Gem.ruby} -S rake debug F=$f && rm $f ; done"
|
323
373
|
end
|
324
374
|
|
325
375
|
# vim: syntax=Ruby
|
data/compare/normalize.rb
CHANGED
@@ -84,6 +84,7 @@ def munge s
|
|
84
84
|
|
85
85
|
"' '", "tSPACE", # needs to be later to avoid bad hits
|
86
86
|
|
87
|
+
"%empty", "none", # newer bison
|
87
88
|
"/* empty */", "none",
|
88
89
|
/^\s*$/, "none",
|
89
90
|
|
@@ -140,6 +141,7 @@ def munge s
|
|
140
141
|
'"do for block"', "kDO_BLOCK",
|
141
142
|
'"do for condition"', "kDO_COND",
|
142
143
|
'"do for lambda"', "kDO_LAMBDA",
|
144
|
+
"tLABEL", "kLABEL",
|
143
145
|
|
144
146
|
# UGH
|
145
147
|
"k_LINE__", "k__LINE__",
|
@@ -155,7 +157,10 @@ def munge s
|
|
155
157
|
/\"(\w+) \(?modifier\)?\"/, proc { |x| "k#{$1.upcase}_MOD" },
|
156
158
|
/\"(\w+)\"/, proc { |x| "k#{$1.upcase}" },
|
157
159
|
|
158
|
-
|
160
|
+
/\$?@(\d+)(\s+|$)/, "", # newer bison
|
161
|
+
|
162
|
+
# TODO: remove for 3.0 work:
|
163
|
+
"lex_ctxt ", "" # 3.0 production that's mostly noise right now
|
159
164
|
]
|
160
165
|
|
161
166
|
renames.each_slice(2) do |(a, b)|
|
data/gauntlet.md
ADDED
@@ -0,0 +1,106 @@
|
|
1
|
+
# Running the Gauntlet
|
2
|
+
|
3
|
+
## Maintaining a Gem Mirror
|
4
|
+
|
5
|
+
I use rubygems-mirror to keep an archive of all the latest rubygems on
|
6
|
+
an external disk. Here is the config:
|
7
|
+
|
8
|
+
```
|
9
|
+
---
|
10
|
+
- from: https://rubygems.org
|
11
|
+
to: /Volumes/StuffA/gauntlet/mirror
|
12
|
+
parallelism: 10
|
13
|
+
retries: 3
|
14
|
+
delete: true
|
15
|
+
skiperror: true
|
16
|
+
hashdir: true
|
17
|
+
```
|
18
|
+
|
19
|
+
And I update using rake:
|
20
|
+
|
21
|
+
```
|
22
|
+
% cd ~/Work/git/rubygems/rubygems-mirror
|
23
|
+
% git down
|
24
|
+
% rake mirror:latest
|
25
|
+
% /Volumes/StuffA/gauntlet/bin/cleanup.rb
|
26
|
+
```
|
27
|
+
|
28
|
+
This rather quickly updates my mirror to the latest versions of
|
29
|
+
everything and then deletes all old versions. I then run a cleanup
|
30
|
+
script that fixes the file dates to their publication date and deletes
|
31
|
+
any gems that have invalid specs. This can argue with the mirror a
|
32
|
+
bit, but it is pretty minimal (currently ~20 bad gems).
|
33
|
+
|
34
|
+
## Curating an Archive of Ruby Files
|
35
|
+
|
36
|
+
Next, I process the gem mirror into a much more digestable structure
|
37
|
+
using `hash.rb` (TODO: needs a better name):
|
38
|
+
|
39
|
+
```
|
40
|
+
% cd RP
|
41
|
+
% /Volumes/StuffA/gauntlet/bin/unpack_gems.rb
|
42
|
+
... waaaait ...
|
43
|
+
% mv hashed.noindex gauntlet.$(today).noindex
|
44
|
+
% lrztar gauntlet.$(today).noindex
|
45
|
+
% mv gauntlet.$(today).noindex.lrz /Volumes/StuffA/gauntlet/
|
46
|
+
```
|
47
|
+
|
48
|
+
This script filters all the newer gems (TODO: WHY?), unpacks them,
|
49
|
+
finds all the files that look like they're valid ruby, ensures they're
|
50
|
+
valid ruby (using the current version of ruby to compile them), and
|
51
|
+
then moves them into a SHA dir structure that looks something like
|
52
|
+
this:
|
53
|
+
|
54
|
+
```
|
55
|
+
hashed.noindex/a/b/c/<full_file_sha>.rb
|
56
|
+
```
|
57
|
+
|
58
|
+
This removes all duplicates and puts everything in a fairly even,
|
59
|
+
wide, flat directory layout.
|
60
|
+
|
61
|
+
This process takes a very long time, even with a lot of
|
62
|
+
parallelization. There are currently about 160k gems in the mirror.
|
63
|
+
Unpacking, validating, SHA'ing everything is disk and CPU intensive.
|
64
|
+
The `.noindex` extension stops spotlight from indexing the continous
|
65
|
+
churn of files being unpacked and moved and saves time.
|
66
|
+
|
67
|
+
Finally, I rename and archive it all up (currently using lrztar, but
|
68
|
+
I'm not in love with it).
|
69
|
+
|
70
|
+
### Stats
|
71
|
+
|
72
|
+
```
|
73
|
+
9696 % find gauntlet.$(today).noindex -type f | lc
|
74
|
+
561270
|
75
|
+
3.5G gauntlet.2021-08-06.noindex
|
76
|
+
239M gauntlet.2021-08-06.noindex.tar.lrz
|
77
|
+
```
|
78
|
+
|
79
|
+
So I wind up with a little over half a million unique ruby files to
|
80
|
+
parse. It's about 3.5g but compresses very nicely down to 240m
|
81
|
+
|
82
|
+
## Running the Gauntlet
|
83
|
+
|
84
|
+
Assuming you're starting from scratch, unpack the archive once:
|
85
|
+
|
86
|
+
```
|
87
|
+
% lrzuntar gauntlet.$(today).noindex.lrz
|
88
|
+
```
|
89
|
+
|
90
|
+
Then, either run a single process (easier to read):
|
91
|
+
|
92
|
+
```
|
93
|
+
% ./gauntlet/bin/gauntlet.rb gauntlet/*.noindex/?
|
94
|
+
```
|
95
|
+
|
96
|
+
Or max out your machine using xargs (note the `-P 16` and choose accordingly):
|
97
|
+
|
98
|
+
```
|
99
|
+
% ls -d gauntlet/*.noindex/?/? | xargs -n 1 -P 16 ./gauntlet/bin/gauntlet.rb
|
100
|
+
```
|
101
|
+
|
102
|
+
In another terminal I usually monitor the progress like so:
|
103
|
+
|
104
|
+
```
|
105
|
+
% while true ; do clear; fd . -t d -t e gauntlet/*.noindex -X rmdir -p 2> /dev/null ; for D in gauntlet/*.noindex/? ; do echo -n "$D: "; fd .rb $D | wc -l ; done ; echo ; sleep 30 ; done
|
106
|
+
```
|
data/lib/rp_extensions.rb
CHANGED
@@ -12,26 +12,24 @@ class Regexp
|
|
12
12
|
end
|
13
13
|
# :startdoc:
|
14
14
|
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
unless "".respond_to?(:grep) then
|
19
|
-
class String
|
20
|
-
def grep re
|
21
|
-
lines.grep re
|
22
|
-
end
|
15
|
+
class Array
|
16
|
+
def prepend *vals
|
17
|
+
self[0,0] = vals
|
23
18
|
end
|
24
|
-
end
|
19
|
+
end unless [].respond_to?(:prepend)
|
25
20
|
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
21
|
+
# :stopdoc:
|
22
|
+
class Symbol
|
23
|
+
def end_with? o
|
24
|
+
self.to_s.end_with? o
|
25
|
+
end
|
26
|
+
end unless :woot.respond_to?(:end_with?)
|
27
|
+
# :startdoc:
|
32
28
|
|
33
|
-
|
29
|
+
############################################################
|
30
|
+
# HACK HACK HACK HACK HACK HACK HACK HACK HACK HACK HACK HACK
|
34
31
|
|
32
|
+
class String
|
35
33
|
def clean_caller
|
36
34
|
self.sub(File.dirname(__FILE__), "./lib").sub(/:in.*/, "")
|
37
35
|
end if $DEBUG
|
@@ -40,34 +38,15 @@ end
|
|
40
38
|
require "sexp"
|
41
39
|
|
42
40
|
class Sexp
|
43
|
-
attr_writer :paren
|
41
|
+
attr_writer :paren # TODO: retire
|
44
42
|
|
45
43
|
def paren
|
46
44
|
@paren ||= false
|
47
45
|
end
|
48
46
|
|
49
|
-
def value
|
50
|
-
raise "multi item sexp" if size > 2
|
51
|
-
last
|
52
|
-
end
|
53
|
-
|
54
|
-
def to_sym
|
55
|
-
raise "no: #{self.inspect}.to_sym is a bug"
|
56
|
-
self.value.to_sym
|
57
|
-
end
|
58
|
-
|
59
|
-
alias :add :<<
|
60
|
-
|
61
|
-
def add_all x
|
62
|
-
self.concat x.sexp_body
|
63
|
-
end
|
64
|
-
|
65
47
|
def block_pass?
|
66
48
|
any? { |s| Sexp === s && s.sexp_type == :block_pass }
|
67
49
|
end
|
68
|
-
|
69
|
-
alias :node_type :sexp_type
|
70
|
-
alias :values :sexp_body # TODO: retire
|
71
50
|
end
|
72
51
|
|
73
52
|
# END HACK
|
data/lib/rp_stringscanner.rb
CHANGED
@@ -1,64 +1,33 @@
|
|
1
1
|
require "strscan"
|
2
2
|
|
3
3
|
class RPStringScanner < StringScanner
|
4
|
-
|
5
|
-
# alias :old_getch :getch
|
6
|
-
# def getch
|
7
|
-
# warn({:getch => caller[0]}.inspect)
|
8
|
-
# old_getch
|
9
|
-
# end
|
10
|
-
# end
|
11
|
-
|
12
|
-
if "".respond_to? :encoding then
|
13
|
-
if "".respond_to? :byteslice then
|
14
|
-
def string_to_pos
|
15
|
-
string.byteslice(0, pos)
|
16
|
-
end
|
17
|
-
else
|
18
|
-
def string_to_pos
|
19
|
-
string.bytes.first(pos).pack("c*").force_encoding(string.encoding)
|
20
|
-
end
|
21
|
-
end
|
22
|
-
|
23
|
-
def charpos
|
24
|
-
string_to_pos.length
|
25
|
-
end
|
26
|
-
else
|
27
|
-
alias :charpos :pos
|
28
|
-
|
29
|
-
def string_to_pos
|
30
|
-
string[0..pos]
|
31
|
-
end
|
32
|
-
end
|
33
|
-
|
34
|
-
def unread_many str # TODO: remove this entirely - we should not need it
|
35
|
-
warn({:unread_many => caller[0]}.inspect) if ENV['TALLY']
|
36
|
-
begin
|
37
|
-
string[charpos, 0] = str
|
38
|
-
rescue IndexError
|
39
|
-
# HACK -- this is a bandaid on a dirty rag on an open festering wound
|
40
|
-
end
|
41
|
-
end
|
42
|
-
|
43
|
-
if ENV['DEBUG'] then
|
44
|
-
alias :old_getch :getch
|
4
|
+
if ENV["DEBUG"] || ENV["TALLY"] then
|
45
5
|
def getch
|
46
|
-
c =
|
47
|
-
|
6
|
+
c = super
|
7
|
+
where = caller.drop_while { |s| s =~ /(getch|nextc).$/ }.first
|
8
|
+
where = where.split(/:/).first(2).join(":")
|
9
|
+
if ENV["TALLY"] then
|
10
|
+
d getch:where
|
11
|
+
else
|
12
|
+
d getch:[c, where]
|
13
|
+
end
|
48
14
|
c
|
49
15
|
end
|
50
16
|
|
51
|
-
alias :old_scan :scan
|
52
17
|
def scan re
|
53
|
-
s =
|
54
|
-
where = caller
|
55
|
-
|
18
|
+
s = super
|
19
|
+
where = caller.drop_while { |x| x =~ /scan.$/ }.first
|
20
|
+
where = where.split(/:/).first(2).join(":")
|
21
|
+
if ENV["TALLY"] then
|
22
|
+
d scan:[where]
|
23
|
+
else
|
24
|
+
d scan:[s, where] if s
|
25
|
+
end
|
56
26
|
s
|
57
27
|
end
|
58
|
-
end
|
59
28
|
|
60
|
-
|
61
|
-
|
29
|
+
def d o
|
30
|
+
STDERR.puts o.inspect
|
31
|
+
end
|
62
32
|
end
|
63
33
|
end
|
64
|
-
|