ruby_parser 3.17.0 → 3.18.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/History.rdoc +76 -0
- data/Manifest.txt +3 -0
- data/README.rdoc +1 -0
- data/Rakefile +68 -18
- data/bin/ruby_parse_extract_error +1 -1
- data/compare/normalize.rb +6 -1
- data/gauntlet.md +106 -0
- data/lib/rp_extensions.rb +15 -36
- data/lib/rp_stringscanner.rb +20 -51
- data/lib/ruby20_parser.rb +3445 -3394
- data/lib/ruby20_parser.y +326 -248
- data/lib/ruby21_parser.rb +3543 -3511
- data/lib/ruby21_parser.y +321 -245
- data/lib/ruby22_parser.rb +3553 -3512
- data/lib/ruby22_parser.y +325 -247
- data/lib/ruby23_parser.rb +3566 -3530
- data/lib/ruby23_parser.y +325 -247
- data/lib/ruby24_parser.rb +3595 -3548
- data/lib/ruby24_parser.y +325 -247
- data/lib/ruby25_parser.rb +3595 -3547
- data/lib/ruby25_parser.y +325 -247
- data/lib/ruby26_parser.rb +3605 -3560
- data/lib/ruby26_parser.y +324 -246
- data/lib/ruby27_parser.rb +4657 -3539
- data/lib/ruby27_parser.y +878 -253
- data/lib/ruby30_parser.rb +5230 -3882
- data/lib/ruby30_parser.y +1069 -321
- data/lib/ruby3_parser.yy +3467 -0
- data/lib/ruby_lexer.rb +261 -609
- data/lib/ruby_lexer.rex +27 -20
- data/lib/ruby_lexer.rex.rb +59 -23
- data/lib/ruby_lexer_strings.rb +638 -0
- data/lib/ruby_parser.yy +910 -263
- data/lib/ruby_parser_extras.rb +289 -114
- data/test/test_ruby_lexer.rb +181 -129
- data/test/test_ruby_parser.rb +1213 -108
- data/tools/munge.rb +34 -6
- data/tools/ripper.rb +15 -10
- data.tar.gz.sig +0 -0
- metadata +11 -12
- metadata.gz.sig +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 36780d9d3244dd62d13430987076d5e81ae2e536d6d2bfd259f8a612da3d94cc
|
4
|
+
data.tar.gz: bec4b32e7f7a8d9ae8e3202f30230f351a2fedc6e2ac4e984260486dbb7529c6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f28d02d2b14687e365bab3a353348b93a9df993be2d1afd3f2783b5b97ca016a6ca2f834ef61ebb4a4eae3decc38e1351349679f951f901bef09c25f23d44322
|
7
|
+
data.tar.gz: 276ecce4db1f72ed2ce0d276679e65419225a46b885d0050aa7ba6382b45033ccd24b5006a0d382f0aecdbb6c5a5fd93e3e826adeafccc3c47ee051b76772eee
|
checksums.yaml.gz.sig
CHANGED
Binary file
|
data/History.rdoc
CHANGED
@@ -1,3 +1,79 @@
|
|
1
|
+
=== 3.18.0 / 2021-10-27
|
2
|
+
|
3
|
+
Holy crap... 58 commits! 2.7 and 3.0 are feature complete. Strings
|
4
|
+
& heredocs have been rewritten.
|
5
|
+
|
6
|
+
* 9 major enhancements:
|
7
|
+
|
8
|
+
* !!! Rewrote lexer (and friends) for strings, heredocs, and %*[] constructs.
|
9
|
+
* Massive overhaul on line numbers.
|
10
|
+
* Freeze input! Finally!!! No more modifying the input string for heredocs.
|
11
|
+
* Overhauled RPStringScanner. Removed OLD compatibility methods!
|
12
|
+
* Removed Sexp methods: value, to_sym, add, add_all, node_type, values.
|
13
|
+
* value moved to sexp_processor.
|
14
|
+
* Removed String#grep monkey-patch.
|
15
|
+
* Removed String#lineno monkey-patch.
|
16
|
+
* Removed string_to_pos, charpos, etc hacks for ancient ruby versions.
|
17
|
+
* Removed unread_many... NO! NO EDITING THE INPUT STRING!
|
18
|
+
|
19
|
+
* 31 minor enhancements:
|
20
|
+
|
21
|
+
* 2.7/3.0: many more pattern edge cases
|
22
|
+
* 2.7: Added `mlhs = rhs rescue expr`
|
23
|
+
* 2.7: refactored destructured args (`|(k,v)|`) and unfactored(?!) case_body/args.
|
24
|
+
* 3.0: excessed_comma
|
25
|
+
* 3.0: finished most everything: endless methods, patterns, etc.
|
26
|
+
* 3.0: refactored / added new pattern changes
|
27
|
+
* Added RubyLexer#in_heredoc? (ie, is there old_ss ?)
|
28
|
+
* Added RubyLexer#old_ss and old_lineno and removed much of SSStack(ish).
|
29
|
+
* Added Symbol#end_with? when necessary
|
30
|
+
* Added TALLY and DEBUG options for ss.getch and ss.scan
|
31
|
+
* Added ignore_body_comments to make parser productions more clear.
|
32
|
+
* Added support for no_kwarg (eg `def f(**nil)`).
|
33
|
+
* Added support for no_kwarg in blocks (eg `f { |**nil| }`).
|
34
|
+
* Augmented generated parser files to have frozen_string_literal comments and fixed tests.
|
35
|
+
* Broke out 3.0 parser into its own to ease development.
|
36
|
+
* Bumped dependencies on sexp_processor and oedipus_lex.
|
37
|
+
* Clean generated 3.x files.
|
38
|
+
* Extracted all string scanner methods to their own module.
|
39
|
+
* Fixed some precedence decls.
|
40
|
+
* Implemented most of pattern matching for 2.7+.
|
41
|
+
* Improve lex_state= to report location in verbose debug mode.
|
42
|
+
* Made it easier to debug with a particular version of ruby via rake.
|
43
|
+
* Make sure ripper uses the same version of ruby we specified.
|
44
|
+
* Moved all string/heredoc/etc code to ruby_lexer_strings.rb
|
45
|
+
* Remove warning from newer bisons.
|
46
|
+
* Sprinkled in some frozen_string_literal, but mostly helped by oedipus bump.
|
47
|
+
* Switch to comparing against ruby binary since ripper is buggy.
|
48
|
+
* bugs task should try both bug*.rb and bad*.rb.
|
49
|
+
* endless methods
|
50
|
+
* f_any_kwrest refactoring.
|
51
|
+
* refactored defn/defs
|
52
|
+
|
53
|
+
* 15 bug fixes:
|
54
|
+
|
55
|
+
* Cleaned a bunch of old hacks. Initializing RubyLexer w/ Parser is cleaner now.
|
56
|
+
* Corrected some lex_state errors in process_token_keyword.
|
57
|
+
* Fixed ancient ruby2 change (use #lines) in ruby_parse_extract_error.
|
58
|
+
* Fixed bug where else without rescue only raises on 2.6+
|
59
|
+
* Fixed caller for getch and scan when DEBUG=1
|
60
|
+
* Fixed comments in the middle of message cascades.
|
61
|
+
* Fixed differences w/ symbol productions against ruby 2.7.
|
62
|
+
* Fixed dsym to use string_contents production.
|
63
|
+
* Fixed error in bdot2/3 in some edge cases. Fixed p_alt line.
|
64
|
+
* Fixed heredoc dedenting in the presence of empty lines. (mvz)
|
65
|
+
* Fixed some leading whitespace / comment processing
|
66
|
+
* Fixed up how class/module/defn/defs comments were collected.
|
67
|
+
* Overhauled ripper.rb to deal with buggy ripper w/ yydebug.
|
68
|
+
* Removed dsym from literal.
|
69
|
+
* Removed tUBANG lexeme but kept it distinct as a method name (eg: `def !@`).
|
70
|
+
|
71
|
+
=== 3.17.0 / 2021-08-03
|
72
|
+
|
73
|
+
* 1 minor enhancement:
|
74
|
+
|
75
|
+
* Added support for arg forwarding (eg `def f(...); m(...); end`) (presidentbeef)
|
76
|
+
|
1
77
|
=== 3.16.0 / 2021-05-15
|
2
78
|
|
3
79
|
* 1 major enhancement:
|
data/Manifest.txt
CHANGED
@@ -7,6 +7,7 @@ bin/ruby_parse
|
|
7
7
|
bin/ruby_parse_extract_error
|
8
8
|
compare/normalize.rb
|
9
9
|
debugging.md
|
10
|
+
gauntlet.md
|
10
11
|
lib/.document
|
11
12
|
lib/rp_extensions.rb
|
12
13
|
lib/rp_stringscanner.rb
|
@@ -28,9 +29,11 @@ lib/ruby27_parser.rb
|
|
28
29
|
lib/ruby27_parser.y
|
29
30
|
lib/ruby30_parser.rb
|
30
31
|
lib/ruby30_parser.y
|
32
|
+
lib/ruby3_parser.yy
|
31
33
|
lib/ruby_lexer.rb
|
32
34
|
lib/ruby_lexer.rex
|
33
35
|
lib/ruby_lexer.rex.rb
|
36
|
+
lib/ruby_lexer_strings.rb
|
34
37
|
lib/ruby_parser.rb
|
35
38
|
lib/ruby_parser.yy
|
36
39
|
lib/ruby_parser_extras.rb
|
data/README.rdoc
CHANGED
@@ -32,6 +32,7 @@ Tested against 801,039 files from the latest of all rubygems (as of 2013-05):
|
|
32
32
|
* 1.8 parser is at 99.9739% accuracy, 3.651 sigma
|
33
33
|
* 1.9 parser is at 99.9940% accuracy, 4.013 sigma
|
34
34
|
* 2.0 parser is at 99.9939% accuracy, 4.008 sigma
|
35
|
+
* 2.6 parser is at 99.9972% accuracy, 4.191 sigma
|
35
36
|
|
36
37
|
== FEATURES/PROBLEMS:
|
37
38
|
|
data/Rakefile
CHANGED
@@ -13,17 +13,22 @@ Hoe.add_include_dirs "../../sexp_processor/dev/lib"
|
|
13
13
|
Hoe.add_include_dirs "../../minitest/dev/lib"
|
14
14
|
Hoe.add_include_dirs "../../oedipus_lex/dev/lib"
|
15
15
|
|
16
|
-
V2 = %w[20 21 22 23 24 25 26 27
|
17
|
-
|
16
|
+
V2 = %w[20 21 22 23 24 25 26 27]
|
17
|
+
V3 = %w[30]
|
18
|
+
|
19
|
+
VERS = V2 + V3
|
20
|
+
|
21
|
+
ENV["FAST"] = VERS.last if ENV["FAST"] && !VERS.include?(ENV["FAST"])
|
22
|
+
VERS.replace [ENV["FAST"]] if ENV["FAST"]
|
18
23
|
|
19
24
|
Hoe.spec "ruby_parser" do
|
20
25
|
developer "Ryan Davis", "ryand-ruby@zenspider.com"
|
21
26
|
|
22
27
|
license "MIT"
|
23
28
|
|
24
|
-
dependency "sexp_processor",
|
29
|
+
dependency "sexp_processor", "~> 4.16"
|
25
30
|
dependency "rake", [">= 10", "< 15"], :developer
|
26
|
-
dependency "oedipus_lex", "~> 2.
|
31
|
+
dependency "oedipus_lex", "~> 2.6", :developer
|
27
32
|
|
28
33
|
# NOTE: Ryan!!! Stop trying to fix this dependency! Isolate just
|
29
34
|
# can't handle having a faux-gem half-installed! Stop! Just `gem
|
@@ -35,11 +40,11 @@ Hoe.spec "ruby_parser" do
|
|
35
40
|
require_ruby_version [">= 2.1", "< 4"]
|
36
41
|
|
37
42
|
if plugin? :perforce then # generated files
|
38
|
-
|
43
|
+
VERS.each do |n|
|
39
44
|
self.perforce_ignore << "lib/ruby#{n}_parser.rb"
|
40
45
|
end
|
41
46
|
|
42
|
-
|
47
|
+
VERS.each do |n|
|
43
48
|
self.perforce_ignore << "lib/ruby#{n}_parser.y"
|
44
49
|
end
|
45
50
|
|
@@ -53,6 +58,23 @@ Hoe.spec "ruby_parser" do
|
|
53
58
|
end
|
54
59
|
end
|
55
60
|
|
61
|
+
def maybe_add_to_top path, string
|
62
|
+
file = File.read path
|
63
|
+
|
64
|
+
return if file.start_with? string
|
65
|
+
|
66
|
+
warn "Altering top of #{path}"
|
67
|
+
tmp_path = "#{path}.tmp"
|
68
|
+
File.open(tmp_path, "w") do |f|
|
69
|
+
f.puts string
|
70
|
+
f.puts
|
71
|
+
|
72
|
+
f.write file
|
73
|
+
# TODO: make this deal with encoding comments properly?
|
74
|
+
end
|
75
|
+
File.rename tmp_path, path
|
76
|
+
end
|
77
|
+
|
56
78
|
V2.each do |n|
|
57
79
|
file "lib/ruby#{n}_parser.y" => "lib/ruby_parser.yy" do |t|
|
58
80
|
cmd = 'unifdef -tk -DV=%s -UDEAD %s > %s || true' % [n, t.source, t.name]
|
@@ -62,8 +84,23 @@ V2.each do |n|
|
|
62
84
|
file "lib/ruby#{n}_parser.rb" => "lib/ruby#{n}_parser.y"
|
63
85
|
end
|
64
86
|
|
87
|
+
V3.each do |n|
|
88
|
+
file "lib/ruby#{n}_parser.y" => "lib/ruby3_parser.yy" do |t|
|
89
|
+
cmd = 'unifdef -tk -DV=%s -UDEAD %s > %s || true' % [n, t.source, t.name]
|
90
|
+
sh cmd
|
91
|
+
end
|
92
|
+
|
93
|
+
file "lib/ruby#{n}_parser.rb" => "lib/ruby#{n}_parser.y"
|
94
|
+
end
|
95
|
+
|
65
96
|
file "lib/ruby_lexer.rex.rb" => "lib/ruby_lexer.rex"
|
66
97
|
|
98
|
+
task :parser do |t|
|
99
|
+
t.prerequisite_tasks.grep(Rake::FileTask).select(&:already_invoked).each do |f|
|
100
|
+
maybe_add_to_top f.name, "# frozen_string_literal: true"
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
67
104
|
task :generate => [:lexer, :parser]
|
68
105
|
|
69
106
|
task :clean do
|
@@ -72,6 +109,7 @@ task :clean do
|
|
72
109
|
Dir["coverage.info"] +
|
73
110
|
Dir["coverage"] +
|
74
111
|
Dir["lib/ruby2*_parser.y"] +
|
112
|
+
Dir["lib/ruby3*_parser.y"] +
|
75
113
|
Dir["lib/*.output"])
|
76
114
|
end
|
77
115
|
|
@@ -151,7 +189,7 @@ def ruby_parse version
|
|
151
189
|
sh "expand parse.y > ../#{parse_y}"
|
152
190
|
end
|
153
191
|
|
154
|
-
ruby "-pi", "-e", 'gsub(/^%
|
192
|
+
ruby "-pi", "-e", 'gsub(/^%pure-parser/, "%define api.pure")', "../#{parse_y}"
|
155
193
|
end
|
156
194
|
sh "rm -rf #{ruby_dir}"
|
157
195
|
end
|
@@ -248,7 +286,7 @@ ruby_parse "2.7.4"
|
|
248
286
|
ruby_parse "3.0.2"
|
249
287
|
|
250
288
|
task :debug => :isolate do
|
251
|
-
ENV["V"] ||=
|
289
|
+
ENV["V"] ||= VERS.last
|
252
290
|
Rake.application[:parser].invoke # this way we can have DEBUG set
|
253
291
|
Rake.application[:lexer].invoke # this way we can have DEBUG set
|
254
292
|
|
@@ -263,7 +301,7 @@ task :debug => :isolate do
|
|
263
301
|
time = (ENV["RP_TIMEOUT"] || 10).to_i
|
264
302
|
|
265
303
|
n = ENV["BUG"]
|
266
|
-
file = (n && "bug#{n}.rb") || ENV["F"] || ENV["FILE"] || "
|
304
|
+
file = (n && "bug#{n}.rb") || ENV["F"] || ENV["FILE"] || "debug.rb"
|
267
305
|
ruby = ENV["R"] || ENV["RUBY"]
|
268
306
|
|
269
307
|
if ruby then
|
@@ -286,19 +324,22 @@ task :debug => :isolate do
|
|
286
324
|
end
|
287
325
|
|
288
326
|
task :debug3 do
|
289
|
-
file = ENV["F"] || "
|
290
|
-
|
327
|
+
file = ENV["F"] || "debug.rb"
|
328
|
+
version = ENV["V"] || ""
|
329
|
+
verbose = ENV["VERBOSE"] ? "-v" : ""
|
291
330
|
munge = "./tools/munge.rb #{verbose}"
|
292
331
|
|
293
332
|
abort "Need a file to parse, via: F=path.rb" unless file
|
294
333
|
|
295
334
|
ENV.delete "V"
|
296
335
|
|
297
|
-
|
298
|
-
|
299
|
-
sh "
|
336
|
+
ruby = "ruby#{version}"
|
337
|
+
|
338
|
+
sh "#{ruby} -v"
|
339
|
+
sh "#{ruby} -y #{file} 2>&1 | #{munge} > tmp/ruby"
|
340
|
+
sh "#{ruby} ./tools/ripper.rb -d #{file} | #{munge} > tmp/rip"
|
300
341
|
sh "rake debug F=#{file} DEBUG=1 2>&1 | #{munge} > tmp/rp"
|
301
|
-
sh "diff -U 999 -d tmp/{
|
342
|
+
sh "diff -U 999 -d tmp/{ruby,rp}"
|
302
343
|
end
|
303
344
|
|
304
345
|
task :cmp do
|
@@ -310,16 +351,25 @@ task :cmp3 do
|
|
310
351
|
end
|
311
352
|
|
312
353
|
task :extract => :isolate do
|
313
|
-
ENV["V"] ||=
|
354
|
+
ENV["V"] ||= VERS.last
|
314
355
|
Rake.application[:parser].invoke # this way we can have DEBUG set
|
315
356
|
|
316
|
-
file = ENV["F"] || ENV["FILE"]
|
357
|
+
file = ENV["F"] || ENV["FILE"] || abort("Need to provide F=<path>")
|
317
358
|
|
318
359
|
ruby "-Ilib", "bin/ruby_parse_extract_error", file
|
319
360
|
end
|
320
361
|
|
362
|
+
task :parse => :isolate do
|
363
|
+
ENV["V"] ||= VERS.last
|
364
|
+
Rake.application[:parser].invoke # this way we can have DEBUG set
|
365
|
+
|
366
|
+
file = ENV["F"] || ENV["FILE"] || abort("Need to provide F=<path>")
|
367
|
+
|
368
|
+
ruby "-Ilib", "bin/ruby_parse", file
|
369
|
+
end
|
370
|
+
|
321
371
|
task :bugs do
|
322
|
-
sh "for f in bug*.rb ; do #{Gem.ruby} -S rake debug F=$f && rm $f ; done"
|
372
|
+
sh "for f in bug*.rb bad*.rb ; do #{Gem.ruby} -S rake debug F=$f && rm $f ; done"
|
323
373
|
end
|
324
374
|
|
325
375
|
# vim: syntax=Ruby
|
data/compare/normalize.rb
CHANGED
@@ -84,6 +84,7 @@ def munge s
|
|
84
84
|
|
85
85
|
"' '", "tSPACE", # needs to be later to avoid bad hits
|
86
86
|
|
87
|
+
"%empty", "none", # newer bison
|
87
88
|
"/* empty */", "none",
|
88
89
|
/^\s*$/, "none",
|
89
90
|
|
@@ -140,6 +141,7 @@ def munge s
|
|
140
141
|
'"do for block"', "kDO_BLOCK",
|
141
142
|
'"do for condition"', "kDO_COND",
|
142
143
|
'"do for lambda"', "kDO_LAMBDA",
|
144
|
+
"tLABEL", "kLABEL",
|
143
145
|
|
144
146
|
# UGH
|
145
147
|
"k_LINE__", "k__LINE__",
|
@@ -155,7 +157,10 @@ def munge s
|
|
155
157
|
/\"(\w+) \(?modifier\)?\"/, proc { |x| "k#{$1.upcase}_MOD" },
|
156
158
|
/\"(\w+)\"/, proc { |x| "k#{$1.upcase}" },
|
157
159
|
|
158
|
-
|
160
|
+
/\$?@(\d+)(\s+|$)/, "", # newer bison
|
161
|
+
|
162
|
+
# TODO: remove for 3.0 work:
|
163
|
+
"lex_ctxt ", "" # 3.0 production that's mostly noise right now
|
159
164
|
]
|
160
165
|
|
161
166
|
renames.each_slice(2) do |(a, b)|
|
data/gauntlet.md
ADDED
@@ -0,0 +1,106 @@
|
|
1
|
+
# Running the Gauntlet
|
2
|
+
|
3
|
+
## Maintaining a Gem Mirror
|
4
|
+
|
5
|
+
I use rubygems-mirror to keep an archive of all the latest rubygems on
|
6
|
+
an external disk. Here is the config:
|
7
|
+
|
8
|
+
```
|
9
|
+
---
|
10
|
+
- from: https://rubygems.org
|
11
|
+
to: /Volumes/StuffA/gauntlet/mirror
|
12
|
+
parallelism: 10
|
13
|
+
retries: 3
|
14
|
+
delete: true
|
15
|
+
skiperror: true
|
16
|
+
hashdir: true
|
17
|
+
```
|
18
|
+
|
19
|
+
And I update using rake:
|
20
|
+
|
21
|
+
```
|
22
|
+
% cd ~/Work/git/rubygems/rubygems-mirror
|
23
|
+
% git down
|
24
|
+
% rake mirror:latest
|
25
|
+
% /Volumes/StuffA/gauntlet/bin/cleanup.rb
|
26
|
+
```
|
27
|
+
|
28
|
+
This rather quickly updates my mirror to the latest versions of
|
29
|
+
everything and then deletes all old versions. I then run a cleanup
|
30
|
+
script that fixes the file dates to their publication date and deletes
|
31
|
+
any gems that have invalid specs. This can argue with the mirror a
|
32
|
+
bit, but it is pretty minimal (currently ~20 bad gems).
|
33
|
+
|
34
|
+
## Curating an Archive of Ruby Files
|
35
|
+
|
36
|
+
Next, I process the gem mirror into a much more digestable structure
|
37
|
+
using `hash.rb` (TODO: needs a better name):
|
38
|
+
|
39
|
+
```
|
40
|
+
% cd RP
|
41
|
+
% /Volumes/StuffA/gauntlet/bin/unpack_gems.rb
|
42
|
+
... waaaait ...
|
43
|
+
% mv hashed.noindex gauntlet.$(today).noindex
|
44
|
+
% lrztar gauntlet.$(today).noindex
|
45
|
+
% mv gauntlet.$(today).noindex.lrz /Volumes/StuffA/gauntlet/
|
46
|
+
```
|
47
|
+
|
48
|
+
This script filters all the newer gems (TODO: WHY?), unpacks them,
|
49
|
+
finds all the files that look like they're valid ruby, ensures they're
|
50
|
+
valid ruby (using the current version of ruby to compile them), and
|
51
|
+
then moves them into a SHA dir structure that looks something like
|
52
|
+
this:
|
53
|
+
|
54
|
+
```
|
55
|
+
hashed.noindex/a/b/c/<full_file_sha>.rb
|
56
|
+
```
|
57
|
+
|
58
|
+
This removes all duplicates and puts everything in a fairly even,
|
59
|
+
wide, flat directory layout.
|
60
|
+
|
61
|
+
This process takes a very long time, even with a lot of
|
62
|
+
parallelization. There are currently about 160k gems in the mirror.
|
63
|
+
Unpacking, validating, SHA'ing everything is disk and CPU intensive.
|
64
|
+
The `.noindex` extension stops spotlight from indexing the continous
|
65
|
+
churn of files being unpacked and moved and saves time.
|
66
|
+
|
67
|
+
Finally, I rename and archive it all up (currently using lrztar, but
|
68
|
+
I'm not in love with it).
|
69
|
+
|
70
|
+
### Stats
|
71
|
+
|
72
|
+
```
|
73
|
+
9696 % find gauntlet.$(today).noindex -type f | lc
|
74
|
+
561270
|
75
|
+
3.5G gauntlet.2021-08-06.noindex
|
76
|
+
239M gauntlet.2021-08-06.noindex.tar.lrz
|
77
|
+
```
|
78
|
+
|
79
|
+
So I wind up with a little over half a million unique ruby files to
|
80
|
+
parse. It's about 3.5g but compresses very nicely down to 240m
|
81
|
+
|
82
|
+
## Running the Gauntlet
|
83
|
+
|
84
|
+
Assuming you're starting from scratch, unpack the archive once:
|
85
|
+
|
86
|
+
```
|
87
|
+
% lrzuntar gauntlet.$(today).noindex.lrz
|
88
|
+
```
|
89
|
+
|
90
|
+
Then, either run a single process (easier to read):
|
91
|
+
|
92
|
+
```
|
93
|
+
% ./gauntlet/bin/gauntlet.rb gauntlet/*.noindex/?
|
94
|
+
```
|
95
|
+
|
96
|
+
Or max out your machine using xargs (note the `-P 16` and choose accordingly):
|
97
|
+
|
98
|
+
```
|
99
|
+
% ls -d gauntlet/*.noindex/?/? | xargs -n 1 -P 16 ./gauntlet/bin/gauntlet.rb
|
100
|
+
```
|
101
|
+
|
102
|
+
In another terminal I usually monitor the progress like so:
|
103
|
+
|
104
|
+
```
|
105
|
+
% while true ; do clear; fd . -t d -t e gauntlet/*.noindex -X rmdir -p 2> /dev/null ; for D in gauntlet/*.noindex/? ; do echo -n "$D: "; fd .rb $D | wc -l ; done ; echo ; sleep 30 ; done
|
106
|
+
```
|
data/lib/rp_extensions.rb
CHANGED
@@ -12,26 +12,24 @@ class Regexp
|
|
12
12
|
end
|
13
13
|
# :startdoc:
|
14
14
|
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
unless "".respond_to?(:grep) then
|
19
|
-
class String
|
20
|
-
def grep re
|
21
|
-
lines.grep re
|
22
|
-
end
|
15
|
+
class Array
|
16
|
+
def prepend *vals
|
17
|
+
self[0,0] = vals
|
23
18
|
end
|
24
|
-
end
|
19
|
+
end unless [].respond_to?(:prepend)
|
25
20
|
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
21
|
+
# :stopdoc:
|
22
|
+
class Symbol
|
23
|
+
def end_with? o
|
24
|
+
self.to_s.end_with? o
|
25
|
+
end
|
26
|
+
end unless :woot.respond_to?(:end_with?)
|
27
|
+
# :startdoc:
|
32
28
|
|
33
|
-
|
29
|
+
############################################################
|
30
|
+
# HACK HACK HACK HACK HACK HACK HACK HACK HACK HACK HACK HACK
|
34
31
|
|
32
|
+
class String
|
35
33
|
def clean_caller
|
36
34
|
self.sub(File.dirname(__FILE__), "./lib").sub(/:in.*/, "")
|
37
35
|
end if $DEBUG
|
@@ -40,34 +38,15 @@ end
|
|
40
38
|
require "sexp"
|
41
39
|
|
42
40
|
class Sexp
|
43
|
-
attr_writer :paren
|
41
|
+
attr_writer :paren # TODO: retire
|
44
42
|
|
45
43
|
def paren
|
46
44
|
@paren ||= false
|
47
45
|
end
|
48
46
|
|
49
|
-
def value
|
50
|
-
raise "multi item sexp" if size > 2
|
51
|
-
last
|
52
|
-
end
|
53
|
-
|
54
|
-
def to_sym
|
55
|
-
raise "no: #{self.inspect}.to_sym is a bug"
|
56
|
-
self.value.to_sym
|
57
|
-
end
|
58
|
-
|
59
|
-
alias :add :<<
|
60
|
-
|
61
|
-
def add_all x
|
62
|
-
self.concat x.sexp_body
|
63
|
-
end
|
64
|
-
|
65
47
|
def block_pass?
|
66
48
|
any? { |s| Sexp === s && s.sexp_type == :block_pass }
|
67
49
|
end
|
68
|
-
|
69
|
-
alias :node_type :sexp_type
|
70
|
-
alias :values :sexp_body # TODO: retire
|
71
50
|
end
|
72
51
|
|
73
52
|
# END HACK
|
data/lib/rp_stringscanner.rb
CHANGED
@@ -1,64 +1,33 @@
|
|
1
1
|
require "strscan"
|
2
2
|
|
3
3
|
class RPStringScanner < StringScanner
|
4
|
-
|
5
|
-
# alias :old_getch :getch
|
6
|
-
# def getch
|
7
|
-
# warn({:getch => caller[0]}.inspect)
|
8
|
-
# old_getch
|
9
|
-
# end
|
10
|
-
# end
|
11
|
-
|
12
|
-
if "".respond_to? :encoding then
|
13
|
-
if "".respond_to? :byteslice then
|
14
|
-
def string_to_pos
|
15
|
-
string.byteslice(0, pos)
|
16
|
-
end
|
17
|
-
else
|
18
|
-
def string_to_pos
|
19
|
-
string.bytes.first(pos).pack("c*").force_encoding(string.encoding)
|
20
|
-
end
|
21
|
-
end
|
22
|
-
|
23
|
-
def charpos
|
24
|
-
string_to_pos.length
|
25
|
-
end
|
26
|
-
else
|
27
|
-
alias :charpos :pos
|
28
|
-
|
29
|
-
def string_to_pos
|
30
|
-
string[0..pos]
|
31
|
-
end
|
32
|
-
end
|
33
|
-
|
34
|
-
def unread_many str # TODO: remove this entirely - we should not need it
|
35
|
-
warn({:unread_many => caller[0]}.inspect) if ENV['TALLY']
|
36
|
-
begin
|
37
|
-
string[charpos, 0] = str
|
38
|
-
rescue IndexError
|
39
|
-
# HACK -- this is a bandaid on a dirty rag on an open festering wound
|
40
|
-
end
|
41
|
-
end
|
42
|
-
|
43
|
-
if ENV['DEBUG'] then
|
44
|
-
alias :old_getch :getch
|
4
|
+
if ENV["DEBUG"] || ENV["TALLY"] then
|
45
5
|
def getch
|
46
|
-
c =
|
47
|
-
|
6
|
+
c = super
|
7
|
+
where = caller.drop_while { |s| s =~ /(getch|nextc).$/ }.first
|
8
|
+
where = where.split(/:/).first(2).join(":")
|
9
|
+
if ENV["TALLY"] then
|
10
|
+
d getch:where
|
11
|
+
else
|
12
|
+
d getch:[c, where]
|
13
|
+
end
|
48
14
|
c
|
49
15
|
end
|
50
16
|
|
51
|
-
alias :old_scan :scan
|
52
17
|
def scan re
|
53
|
-
s =
|
54
|
-
where = caller
|
55
|
-
|
18
|
+
s = super
|
19
|
+
where = caller.drop_while { |x| x =~ /scan.$/ }.first
|
20
|
+
where = where.split(/:/).first(2).join(":")
|
21
|
+
if ENV["TALLY"] then
|
22
|
+
d scan:[where]
|
23
|
+
else
|
24
|
+
d scan:[s, where] if s
|
25
|
+
end
|
56
26
|
s
|
57
27
|
end
|
58
|
-
end
|
59
28
|
|
60
|
-
|
61
|
-
|
29
|
+
def d o
|
30
|
+
STDERR.puts o.inspect
|
31
|
+
end
|
62
32
|
end
|
63
33
|
end
|
64
|
-
|