chrislo-sourceclassifier 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/Manifest +325 -0
- data/README.textile +51 -0
- data/Rakefile +64 -0
- data/examples/example.rb +22 -0
- data/lib/sourceclassifier.rb +22 -0
- data/lib/trainer.rb +51 -0
- data/sourceclassifier.gemspec +35 -0
- data/sources/gcc/ackermann.gcc-2.gcc +93 -0
- data/sources/gcc/ackermann.gcc-3.gcc +20 -0
- data/sources/gcc/ary.gcc +40 -0
- data/sources/gcc/binarytrees.gcc +136 -0
- data/sources/gcc/binarytrees.gcc-2.gcc +140 -0
- data/sources/gcc/binarytrees.gcc-3.gcc +134 -0
- data/sources/gcc/chameneos.gcc +117 -0
- data/sources/gcc/chameneos.gcc-2.gcc +134 -0
- data/sources/gcc/chameneos.gcc-3.gcc +120 -0
- data/sources/gcc/chameneosredux.gcc +306 -0
- data/sources/gcc/echo.gcc +144 -0
- data/sources/gcc/except.gcc +52 -0
- data/sources/gcc/fannkuch.gcc +105 -0
- data/sources/gcc/fannkuch.gcc-2.gcc +94 -0
- data/sources/gcc/fasta.gcc +158 -0
- data/sources/gcc/fasta.gcc-2.gcc +132 -0
- data/sources/gcc/fasta.gcc-3.gcc +145 -0
- data/sources/gcc/fasta.gcc-4.gcc +141 -0
- data/sources/gcc/fibo.gcc +20 -0
- data/sources/gcc/harmonic.gcc +22 -0
- data/sources/gcc/hash.gcc +31 -0
- data/sources/gcc/hash2.gcc +38 -0
- data/sources/gcc/heapsort.gcc +72 -0
- data/sources/gcc/hello.gcc-2.gcc +12 -0
- data/sources/gcc/hello.gcc-3.gcc +20 -0
- data/sources/gcc/knucleotide.gcc +173 -0
- data/sources/gcc/knucleotide.gcc-2.gcc +168 -0
- data/sources/gcc/lists.gcc +226 -0
- data/sources/gcc/magicsquares.gcc +378 -0
- data/sources/gcc/mandelbrot.gcc +84 -0
- data/sources/gcc/mandelbrot.gcc-2.gcc +63 -0
- data/sources/gcc/mandelbrot.gcc-3.gcc +91 -0
- data/sources/gcc/mandelbrot.gcc-4.gcc +78 -0
- data/sources/gcc/matrix.gcc +66 -0
- data/sources/gcc/message.gcc-2.gcc +72 -0
- data/sources/gcc/message.gcc-3.gcc +64 -0
- data/sources/gcc/methcall.gcc +89 -0
- data/sources/gcc/moments.gcc +120 -0
- data/sources/gcc/nbody.gcc +142 -0
- data/sources/gcc/nbody.gcc-2.gcc +143 -0
- data/sources/gcc/nbody.gcc-3.gcc +142 -0
- data/sources/gcc/nestedloop.gcc +25 -0
- data/sources/gcc/nsieve.gcc +35 -0
- data/sources/gcc/nsievebits.gcc-2.gcc +37 -0
- data/sources/gcc/nsievebits.gcc-3.gcc +53 -0
- data/sources/gcc/objinst.gcc +95 -0
- data/sources/gcc/partialsums.gcc +86 -0
- data/sources/gcc/partialsums.gcc-3.gcc +81 -0
- data/sources/gcc/partialsums.gcc-4.gcc +68 -0
- data/sources/gcc/pidigits.gcc +105 -0
- data/sources/gcc/primes.gcc +75 -0
- data/sources/gcc/prodcons.gcc +86 -0
- data/sources/gcc/random.gcc +29 -0
- data/sources/gcc/raytracer.gcc +125 -0
- data/sources/gcc/raytracer.gcc-2.gcc +181 -0
- data/sources/gcc/recursive.gcc +68 -0
- data/sources/gcc/recursive.gcc-2.gcc +55 -0
- data/sources/gcc/regexdna.gcc-2.gcc +126 -0
- data/sources/gcc/regexmatch.gcc +136 -0
- data/sources/gcc/revcomp.gcc +85 -0
- data/sources/gcc/revcomp.gcc-2.gcc +88 -0
- data/sources/gcc/revcomp.gcc-4.gcc +71 -0
- data/sources/gcc/reversefile.gcc +103 -0
- data/sources/gcc/reversefile.gcc-2.gcc +56 -0
- data/sources/gcc/sieve.gcc +34 -0
- data/sources/gcc/spectralnorm.gcc +54 -0
- data/sources/gcc/spellcheck.gcc +72 -0
- data/sources/gcc/spellcheck.gcc-2.gcc +61 -0
- data/sources/gcc/strcat.gcc +38 -0
- data/sources/gcc/sumcol.gcc-2.gcc +98 -0
- data/sources/gcc/sumcol.gcc-3.gcc +22 -0
- data/sources/gcc/sumcol.gcc-4.gcc +18 -0
- data/sources/gcc/sumcol.gcc-5.gcc +32 -0
- data/sources/gcc/takfp.gcc +23 -0
- data/sources/gcc/tcp-stream.gcc +122 -0
- data/sources/gcc/tcpecho.gcc +122 -0
- data/sources/gcc/tcpecho.gcc-2.gcc +136 -0
- data/sources/gcc/tcprequest.gcc +122 -0
- data/sources/gcc/threadring.gcc +73 -0
- data/sources/gcc/wc.gcc +52 -0
- data/sources/gcc/wc.gcc-2.gcc +40 -0
- data/sources/gcc/wc.gcc-3.gcc +46 -0
- data/sources/gcc/wordfreq.gcc-2.gcc +85 -0
- data/sources/perl/ackermann.perl +28 -0
- data/sources/perl/ackermann.perl-2.perl +25 -0
- data/sources/perl/ackermann.perl-3.perl +20 -0
- data/sources/perl/ackermann.perl-4.perl +18 -0
- data/sources/perl/ary.perl +25 -0
- data/sources/perl/ary.perl-2.perl +23 -0
- data/sources/perl/binarytrees.perl +66 -0
- data/sources/perl/binarytrees.perl-2.perl +71 -0
- data/sources/perl/chameneos.perl +67 -0
- data/sources/perl/echo.perl +81 -0
- data/sources/perl/except.perl +73 -0
- data/sources/perl/fannkuch.perl +44 -0
- data/sources/perl/fannkuch.perl-2.perl +38 -0
- data/sources/perl/fasta.perl +112 -0
- data/sources/perl/fasta.perl-2.perl +135 -0
- data/sources/perl/fasta.perl-4.perl +122 -0
- data/sources/perl/fibo.perl +15 -0
- data/sources/perl/fibo.perl-2.perl +0 -0
- data/sources/perl/fibo.perl-3.perl +0 -0
- data/sources/perl/harmonic.perl +7 -0
- data/sources/perl/hash.perl +23 -0
- data/sources/perl/hash.perl-2.perl +17 -0
- data/sources/perl/hash.perl-3.perl +24 -0
- data/sources/perl/hash2.perl +16 -0
- data/sources/perl/heapsort.perl +65 -0
- data/sources/perl/heapsort.perl-2.perl +0 -0
- data/sources/perl/hello.perl +5 -0
- data/sources/perl/knucleotide.perl-2.perl +30 -0
- data/sources/perl/lists.perl +48 -0
- data/sources/perl/mandelbrot.perl-2.perl +32 -0
- data/sources/perl/matrix.perl +59 -0
- data/sources/perl/matrix.perl-2.perl +0 -0
- data/sources/perl/matrix.perl-3.perl +0 -0
- data/sources/perl/message.perl +27 -0
- data/sources/perl/methcall.perl +66 -0
- data/sources/perl/moments.perl +44 -0
- data/sources/perl/nbody.perl +108 -0
- data/sources/perl/nestedloop.perl +28 -0
- data/sources/perl/nsieve.perl-2.perl +41 -0
- data/sources/perl/nsieve.perl-4.perl +43 -0
- data/sources/perl/nsievebits.perl +37 -0
- data/sources/perl/objinst.perl +73 -0
- data/sources/perl/partialsums.perl-3.perl +31 -0
- data/sources/perl/pidigits.perl +52 -0
- data/sources/perl/pidigits.perl-2.perl +47 -0
- data/sources/perl/process.perl +50 -0
- data/sources/perl/prodcons.perl +47 -0
- data/sources/perl/random.perl-4.perl +17 -0
- data/sources/perl/recursive.perl-2.perl +57 -0
- data/sources/perl/regexdna.perl +48 -0
- data/sources/perl/regexdna.perl-2.perl +43 -0
- data/sources/perl/regexdna.perl-3.perl +50 -0
- data/sources/perl/regexdna.perl-4.perl +49 -0
- data/sources/perl/regexdna.perl-5.perl +42 -0
- data/sources/perl/regexdna.perl-6.perl +43 -0
- data/sources/perl/regexmatch.perl +35 -0
- data/sources/perl/revcomp.perl-2.perl +34 -0
- data/sources/perl/reversefile.perl +8 -0
- data/sources/perl/reversefile.perl-2.perl +0 -0
- data/sources/perl/reversefile.perl-3.perl +0 -0
- data/sources/perl/sieve.perl +23 -0
- data/sources/perl/spectralnorm.perl-2.perl +54 -0
- data/sources/perl/spellcheck.perl +24 -0
- data/sources/perl/strcat.perl +13 -0
- data/sources/perl/strcat.perl-2.perl +0 -0
- data/sources/perl/sumcol.perl +8 -0
- data/sources/perl/takfp.perl +23 -0
- data/sources/perl/takfp.perl-3.perl +20 -0
- data/sources/perl/tcpecho.perl +61 -0
- data/sources/perl/tcprequest.perl +61 -0
- data/sources/perl/tcpstream.perl +61 -0
- data/sources/perl/threadring.perl +55 -0
- data/sources/perl/threadring.perl-2.perl +43 -0
- data/sources/perl/wc.perl +20 -0
- data/sources/perl/wc.perl-2.perl +14 -0
- data/sources/perl/wordfreq.perl +22 -0
- data/sources/perl/wordfreq.perl-3.perl +0 -0
- data/sources/perl/wordfreq.perl3.perl +0 -0
- data/sources/python/ackermann.python +21 -0
- data/sources/python/ary.python +19 -0
- data/sources/python/binarytrees.python +39 -0
- data/sources/python/binarytrees.python-3.python +44 -0
- data/sources/python/chameneos.python-6.python +73 -0
- data/sources/python/chameneosredux.python +126 -0
- data/sources/python/chameneosredux.python-2.python +122 -0
- data/sources/python/dispatch.python +176 -0
- data/sources/python/dispatch.python-2.python +136 -0
- data/sources/python/echo.python +64 -0
- data/sources/python/except.python +62 -0
- data/sources/python/fannkuch.python +50 -0
- data/sources/python/fannkuch.python-2.python +54 -0
- data/sources/python/fasta.python-2.python +79 -0
- data/sources/python/fibo.python +17 -0
- data/sources/python/fibo.python-2.python +0 -0
- data/sources/python/fibo.python-3.python +0 -0
- data/sources/python/harmonic.python-2.python +9 -0
- data/sources/python/hash.python +21 -0
- data/sources/python/hash.python-2.python +0 -0
- data/sources/python/hash2.python +30 -0
- data/sources/python/heapsort.python-3.python +66 -0
- data/sources/python/hello.python +5 -0
- data/sources/python/implicitode.python +231 -0
- data/sources/python/knucleotide.python +55 -0
- data/sources/python/lists.python +44 -0
- data/sources/python/magicsquares.python +145 -0
- data/sources/python/mandelbrot.python +44 -0
- data/sources/python/mandelbrot.python-2.python +35 -0
- data/sources/python/mandelbrot.python-3.python +46 -0
- data/sources/python/matrix.python +34 -0
- data/sources/python/matrix.python-2.python +23 -0
- data/sources/python/message.python +24 -0
- data/sources/python/message.python-2.python +20 -0
- data/sources/python/message.python-3.python +19 -0
- data/sources/python/meteor.python +210 -0
- data/sources/python/meteor.python-2.python +192 -0
- data/sources/python/methcall.python +51 -0
- data/sources/python/moments.python +65 -0
- data/sources/python/nbody.python +123 -0
- data/sources/python/nbody.python-2.python +120 -0
- data/sources/python/nestedloop.python +24 -0
- data/sources/python/nsieve.python +27 -0
- data/sources/python/nsieve.python-2.python +23 -0
- data/sources/python/nsieve.python-4.python +25 -0
- data/sources/python/nsievebits.python +27 -0
- data/sources/python/nsievebits.python-2.python +43 -0
- data/sources/python/objinst.python +53 -0
- data/sources/python/partialsums.python +37 -0
- data/sources/python/partialsums.python-2.python +35 -0
- data/sources/python/partialsums.python-3.python +48 -0
- data/sources/python/pidigits.python +38 -0
- data/sources/python/pidigits.python-3.python +63 -0
- data/sources/python/pidigits.python-4.python +24 -0
- data/sources/python/process.python +51 -0
- data/sources/python/process.python-2.python +133 -0
- data/sources/python/prodcons.python +51 -0
- data/sources/python/prodcons.python-2.python +0 -0
- data/sources/python/random.python +27 -0
- data/sources/python/raytracer.python +203 -0
- data/sources/python/recursive.python +35 -0
- data/sources/python/regexdna.python +39 -0
- data/sources/python/regexdna.python-2.python +34 -0
- data/sources/python/regexmatch.python +36 -0
- data/sources/python/revcomp.python-3.python +31 -0
- data/sources/python/reversefile.python +13 -0
- data/sources/python/reversefile.python-2.python +0 -0
- data/sources/python/reversefile.python-3.python +0 -0
- data/sources/python/sieve.python +50 -0
- data/sources/python/spectralnorm.python-2.python +36 -0
- data/sources/python/spellcheck.python +17 -0
- data/sources/python/strcat.python +35 -0
- data/sources/python/strcat.python-2.python +0 -0
- data/sources/python/sumcol.python-2.python +0 -0
- data/sources/python/sumcol.python-3.python +0 -0
- data/sources/python/takfp.python +19 -0
- data/sources/python/tcpecho.python +67 -0
- data/sources/python/tcprequest.python +67 -0
- data/sources/python/tcpstream.python +67 -0
- data/sources/python/threadring.python +47 -0
- data/sources/python/threadring.python-2.python +40 -0
- data/sources/python/threadring.python-3.python +34 -0
- data/sources/python/wc.python-2.python +19 -0
- data/sources/python/wordfreq.python +43 -0
- data/sources/python/wordfreq.python-2.python +0 -0
- data/sources/python/wordfreq.python-3.python +28 -0
- data/sources/python/wordfreq.python-4.python +38 -0
- data/sources/python/wordfreq.python-5.python +39 -0
- data/sources/ruby/ackermann.ruby +17 -0
- data/sources/ruby/ackermann.ruby-5.ruby +153 -0
- data/sources/ruby/ary.ruby +22 -0
- data/sources/ruby/binarytrees.ruby-2.ruby +55 -0
- data/sources/ruby/chameneos.ruby-2.ruby +71 -0
- data/sources/ruby/dispatch.ruby +114 -0
- data/sources/ruby/echo.ruby +41 -0
- data/sources/ruby/except.ruby +61 -0
- data/sources/ruby/except.ruby-2.ruby +61 -0
- data/sources/ruby/fannkuch.ruby +42 -0
- data/sources/ruby/fasta.ruby +81 -0
- data/sources/ruby/fibo.ruby +15 -0
- data/sources/ruby/harmonic.ruby-2.ruby +15 -0
- data/sources/ruby/hash.ruby +19 -0
- data/sources/ruby/hash2.ruby +23 -0
- data/sources/ruby/heapsort.ruby +55 -0
- data/sources/ruby/hello.ruby +6 -0
- data/sources/ruby/knucleotide.ruby-2.ruby +44 -0
- data/sources/ruby/lists.ruby +46 -0
- data/sources/ruby/mandelbrot.ruby-3.ruby +63 -0
- data/sources/ruby/matrix.ruby +40 -0
- data/sources/ruby/matrix.ruby-2.ruby +30 -0
- data/sources/ruby/message.ruby +29 -0
- data/sources/ruby/message.ruby-2.ruby +24 -0
- data/sources/ruby/meteor.ruby +386 -0
- data/sources/ruby/meteor.ruby-2.ruby +561 -0
- data/sources/ruby/methcall.ruby +58 -0
- data/sources/ruby/methcall.ruby-2.ruby +54 -0
- data/sources/ruby/moments.ruby +64 -0
- data/sources/ruby/nbody.ruby-2.ruby +145 -0
- data/sources/ruby/nestedloop.ruby +22 -0
- data/sources/ruby/nsieve.ruby +36 -0
- data/sources/ruby/nsieve.ruby-2.ruby +25 -0
- data/sources/ruby/nsievebits.ruby-2.ruby +42 -0
- data/sources/ruby/objinst.ruby +58 -0
- data/sources/ruby/partialsums.ruby +39 -0
- data/sources/ruby/pidigits.ruby +92 -0
- data/sources/ruby/pidigits.ruby-2.ruby +109 -0
- data/sources/ruby/prodcons.ruby +41 -0
- data/sources/ruby/random.ruby +17 -0
- data/sources/ruby/recursive.ruby-2.ruby +53 -0
- data/sources/ruby/regexdna.ruby +32 -0
- data/sources/ruby/regexdna.ruby-2.ruby +38 -0
- data/sources/ruby/regexmatch.ruby +33 -0
- data/sources/ruby/revcomp.ruby +28 -0
- data/sources/ruby/reversefile.ruby +7 -0
- data/sources/ruby/sieve.ruby +30 -0
- data/sources/ruby/spectralnorm.ruby +48 -0
- data/sources/ruby/spellcheck.ruby +18 -0
- data/sources/ruby/spellcheck.ruby-2.ruby +0 -0
- data/sources/ruby/strcat.ruby +12 -0
- data/sources/ruby/strcat.ruby-2.ruby +12 -0
- data/sources/ruby/sumcol.ruby +12 -0
- data/sources/ruby/sumcol.ruby-2.ruby +5 -0
- data/sources/ruby/takfp.ruby +15 -0
- data/sources/ruby/tcpecho.ruby +45 -0
- data/sources/ruby/tcprequest.ruby +45 -0
- data/sources/ruby/tcpstream.ruby +45 -0
- data/sources/ruby/threadring.ruby +61 -0
- data/sources/ruby/threadring.ruby-2.ruby +33 -0
- data/sources/ruby/wc.ruby +15 -0
- data/sources/ruby/wordfreq.ruby +17 -0
- data/sources/ruby/wordfreq.ruby2.ruby +0 -0
- data/test/fixtures/sources/gcc/ackermann.gcc-2.gcc +93 -0
- data/test/fixtures/sources/python/ackermann.python +21 -0
- data/test/fixtures/sources/ruby/ackermann.ruby +17 -0
- data/test/test_source_classifier.rb +40 -0
- data/test/test_trainer.rb +34 -0
- data/trainer.bin +1193 -0
- metadata +393 -0
data/Manifest
ADDED
@@ -0,0 +1,325 @@
|
|
1
|
+
examples/example.rb
|
2
|
+
lib/sourceclassifier.rb
|
3
|
+
lib/trainer.rb
|
4
|
+
Rakefile
|
5
|
+
README.textile
|
6
|
+
sourceclassifier.gemspec
|
7
|
+
sources/gcc/ackermann.gcc-2.gcc
|
8
|
+
sources/gcc/ackermann.gcc-3.gcc
|
9
|
+
sources/gcc/ary.gcc
|
10
|
+
sources/gcc/binarytrees.gcc
|
11
|
+
sources/gcc/binarytrees.gcc-2.gcc
|
12
|
+
sources/gcc/binarytrees.gcc-3.gcc
|
13
|
+
sources/gcc/chameneos.gcc
|
14
|
+
sources/gcc/chameneos.gcc-2.gcc
|
15
|
+
sources/gcc/chameneos.gcc-3.gcc
|
16
|
+
sources/gcc/chameneosredux.gcc
|
17
|
+
sources/gcc/echo.gcc
|
18
|
+
sources/gcc/except.gcc
|
19
|
+
sources/gcc/fannkuch.gcc
|
20
|
+
sources/gcc/fannkuch.gcc-2.gcc
|
21
|
+
sources/gcc/fasta.gcc
|
22
|
+
sources/gcc/fasta.gcc-2.gcc
|
23
|
+
sources/gcc/fasta.gcc-3.gcc
|
24
|
+
sources/gcc/fasta.gcc-4.gcc
|
25
|
+
sources/gcc/fibo.gcc
|
26
|
+
sources/gcc/harmonic.gcc
|
27
|
+
sources/gcc/hash.gcc
|
28
|
+
sources/gcc/hash2.gcc
|
29
|
+
sources/gcc/heapsort.gcc
|
30
|
+
sources/gcc/hello.gcc-2.gcc
|
31
|
+
sources/gcc/hello.gcc-3.gcc
|
32
|
+
sources/gcc/knucleotide.gcc
|
33
|
+
sources/gcc/knucleotide.gcc-2.gcc
|
34
|
+
sources/gcc/lists.gcc
|
35
|
+
sources/gcc/magicsquares.gcc
|
36
|
+
sources/gcc/mandelbrot.gcc
|
37
|
+
sources/gcc/mandelbrot.gcc-2.gcc
|
38
|
+
sources/gcc/mandelbrot.gcc-3.gcc
|
39
|
+
sources/gcc/mandelbrot.gcc-4.gcc
|
40
|
+
sources/gcc/matrix.gcc
|
41
|
+
sources/gcc/message.gcc-2.gcc
|
42
|
+
sources/gcc/message.gcc-3.gcc
|
43
|
+
sources/gcc/methcall.gcc
|
44
|
+
sources/gcc/moments.gcc
|
45
|
+
sources/gcc/nbody.gcc
|
46
|
+
sources/gcc/nbody.gcc-2.gcc
|
47
|
+
sources/gcc/nbody.gcc-3.gcc
|
48
|
+
sources/gcc/nestedloop.gcc
|
49
|
+
sources/gcc/nsieve.gcc
|
50
|
+
sources/gcc/nsievebits.gcc-2.gcc
|
51
|
+
sources/gcc/nsievebits.gcc-3.gcc
|
52
|
+
sources/gcc/objinst.gcc
|
53
|
+
sources/gcc/partialsums.gcc
|
54
|
+
sources/gcc/partialsums.gcc-3.gcc
|
55
|
+
sources/gcc/partialsums.gcc-4.gcc
|
56
|
+
sources/gcc/pidigits.gcc
|
57
|
+
sources/gcc/primes.gcc
|
58
|
+
sources/gcc/prodcons.gcc
|
59
|
+
sources/gcc/random.gcc
|
60
|
+
sources/gcc/raytracer.gcc
|
61
|
+
sources/gcc/raytracer.gcc-2.gcc
|
62
|
+
sources/gcc/recursive.gcc
|
63
|
+
sources/gcc/recursive.gcc-2.gcc
|
64
|
+
sources/gcc/regexdna.gcc-2.gcc
|
65
|
+
sources/gcc/regexmatch.gcc
|
66
|
+
sources/gcc/revcomp.gcc
|
67
|
+
sources/gcc/revcomp.gcc-2.gcc
|
68
|
+
sources/gcc/revcomp.gcc-4.gcc
|
69
|
+
sources/gcc/reversefile.gcc
|
70
|
+
sources/gcc/reversefile.gcc-2.gcc
|
71
|
+
sources/gcc/sieve.gcc
|
72
|
+
sources/gcc/spectralnorm.gcc
|
73
|
+
sources/gcc/spellcheck.gcc
|
74
|
+
sources/gcc/spellcheck.gcc-2.gcc
|
75
|
+
sources/gcc/strcat.gcc
|
76
|
+
sources/gcc/sumcol.gcc-2.gcc
|
77
|
+
sources/gcc/sumcol.gcc-3.gcc
|
78
|
+
sources/gcc/sumcol.gcc-4.gcc
|
79
|
+
sources/gcc/sumcol.gcc-5.gcc
|
80
|
+
sources/gcc/takfp.gcc
|
81
|
+
sources/gcc/tcp-stream.gcc
|
82
|
+
sources/gcc/tcpecho.gcc
|
83
|
+
sources/gcc/tcpecho.gcc-2.gcc
|
84
|
+
sources/gcc/tcprequest.gcc
|
85
|
+
sources/gcc/threadring.gcc
|
86
|
+
sources/gcc/wc.gcc
|
87
|
+
sources/gcc/wc.gcc-2.gcc
|
88
|
+
sources/gcc/wc.gcc-3.gcc
|
89
|
+
sources/gcc/wordfreq.gcc-2.gcc
|
90
|
+
sources/perl/ackermann.perl
|
91
|
+
sources/perl/ackermann.perl-2.perl
|
92
|
+
sources/perl/ackermann.perl-3.perl
|
93
|
+
sources/perl/ackermann.perl-4.perl
|
94
|
+
sources/perl/ary.perl
|
95
|
+
sources/perl/ary.perl-2.perl
|
96
|
+
sources/perl/binarytrees.perl
|
97
|
+
sources/perl/binarytrees.perl-2.perl
|
98
|
+
sources/perl/chameneos.perl
|
99
|
+
sources/perl/echo.perl
|
100
|
+
sources/perl/except.perl
|
101
|
+
sources/perl/fannkuch.perl
|
102
|
+
sources/perl/fannkuch.perl-2.perl
|
103
|
+
sources/perl/fasta.perl
|
104
|
+
sources/perl/fasta.perl-2.perl
|
105
|
+
sources/perl/fasta.perl-4.perl
|
106
|
+
sources/perl/fibo.perl
|
107
|
+
sources/perl/fibo.perl-2.perl
|
108
|
+
sources/perl/fibo.perl-3.perl
|
109
|
+
sources/perl/harmonic.perl
|
110
|
+
sources/perl/hash.perl
|
111
|
+
sources/perl/hash.perl-2.perl
|
112
|
+
sources/perl/hash.perl-3.perl
|
113
|
+
sources/perl/hash2.perl
|
114
|
+
sources/perl/heapsort.perl
|
115
|
+
sources/perl/heapsort.perl-2.perl
|
116
|
+
sources/perl/hello.perl
|
117
|
+
sources/perl/knucleotide.perl-2.perl
|
118
|
+
sources/perl/lists.perl
|
119
|
+
sources/perl/mandelbrot.perl-2.perl
|
120
|
+
sources/perl/matrix.perl
|
121
|
+
sources/perl/matrix.perl-2.perl
|
122
|
+
sources/perl/matrix.perl-3.perl
|
123
|
+
sources/perl/message.perl
|
124
|
+
sources/perl/methcall.perl
|
125
|
+
sources/perl/moments.perl
|
126
|
+
sources/perl/nbody.perl
|
127
|
+
sources/perl/nestedloop.perl
|
128
|
+
sources/perl/nsieve.perl-2.perl
|
129
|
+
sources/perl/nsieve.perl-4.perl
|
130
|
+
sources/perl/nsievebits.perl
|
131
|
+
sources/perl/objinst.perl
|
132
|
+
sources/perl/partialsums.perl-3.perl
|
133
|
+
sources/perl/pidigits.perl
|
134
|
+
sources/perl/pidigits.perl-2.perl
|
135
|
+
sources/perl/process.perl
|
136
|
+
sources/perl/prodcons.perl
|
137
|
+
sources/perl/random.perl-4.perl
|
138
|
+
sources/perl/recursive.perl-2.perl
|
139
|
+
sources/perl/regexdna.perl
|
140
|
+
sources/perl/regexdna.perl-2.perl
|
141
|
+
sources/perl/regexdna.perl-3.perl
|
142
|
+
sources/perl/regexdna.perl-4.perl
|
143
|
+
sources/perl/regexdna.perl-5.perl
|
144
|
+
sources/perl/regexdna.perl-6.perl
|
145
|
+
sources/perl/regexmatch.perl
|
146
|
+
sources/perl/revcomp.perl-2.perl
|
147
|
+
sources/perl/reversefile.perl
|
148
|
+
sources/perl/reversefile.perl-2.perl
|
149
|
+
sources/perl/reversefile.perl-3.perl
|
150
|
+
sources/perl/sieve.perl
|
151
|
+
sources/perl/spectralnorm.perl-2.perl
|
152
|
+
sources/perl/spellcheck.perl
|
153
|
+
sources/perl/strcat.perl
|
154
|
+
sources/perl/strcat.perl-2.perl
|
155
|
+
sources/perl/sumcol.perl
|
156
|
+
sources/perl/takfp.perl
|
157
|
+
sources/perl/takfp.perl-3.perl
|
158
|
+
sources/perl/tcpecho.perl
|
159
|
+
sources/perl/tcprequest.perl
|
160
|
+
sources/perl/tcpstream.perl
|
161
|
+
sources/perl/threadring.perl
|
162
|
+
sources/perl/threadring.perl-2.perl
|
163
|
+
sources/perl/wc.perl
|
164
|
+
sources/perl/wc.perl-2.perl
|
165
|
+
sources/perl/wordfreq.perl
|
166
|
+
sources/perl/wordfreq.perl-3.perl
|
167
|
+
sources/perl/wordfreq.perl3.perl
|
168
|
+
sources/python/ackermann.python
|
169
|
+
sources/python/ary.python
|
170
|
+
sources/python/binarytrees.python
|
171
|
+
sources/python/binarytrees.python-3.python
|
172
|
+
sources/python/chameneos.python-6.python
|
173
|
+
sources/python/chameneosredux.python
|
174
|
+
sources/python/chameneosredux.python-2.python
|
175
|
+
sources/python/dispatch.python
|
176
|
+
sources/python/dispatch.python-2.python
|
177
|
+
sources/python/echo.python
|
178
|
+
sources/python/except.python
|
179
|
+
sources/python/fannkuch.python
|
180
|
+
sources/python/fannkuch.python-2.python
|
181
|
+
sources/python/fasta.python-2.python
|
182
|
+
sources/python/fibo.python
|
183
|
+
sources/python/fibo.python-2.python
|
184
|
+
sources/python/fibo.python-3.python
|
185
|
+
sources/python/harmonic.python-2.python
|
186
|
+
sources/python/hash.python
|
187
|
+
sources/python/hash.python-2.python
|
188
|
+
sources/python/hash2.python
|
189
|
+
sources/python/heapsort.python-3.python
|
190
|
+
sources/python/hello.python
|
191
|
+
sources/python/implicitode.python
|
192
|
+
sources/python/knucleotide.python
|
193
|
+
sources/python/lists.python
|
194
|
+
sources/python/magicsquares.python
|
195
|
+
sources/python/mandelbrot.python
|
196
|
+
sources/python/mandelbrot.python-2.python
|
197
|
+
sources/python/mandelbrot.python-3.python
|
198
|
+
sources/python/matrix.python
|
199
|
+
sources/python/matrix.python-2.python
|
200
|
+
sources/python/message.python
|
201
|
+
sources/python/message.python-2.python
|
202
|
+
sources/python/message.python-3.python
|
203
|
+
sources/python/meteor.python
|
204
|
+
sources/python/meteor.python-2.python
|
205
|
+
sources/python/methcall.python
|
206
|
+
sources/python/moments.python
|
207
|
+
sources/python/nbody.python
|
208
|
+
sources/python/nbody.python-2.python
|
209
|
+
sources/python/nestedloop.python
|
210
|
+
sources/python/nsieve.python
|
211
|
+
sources/python/nsieve.python-2.python
|
212
|
+
sources/python/nsieve.python-4.python
|
213
|
+
sources/python/nsievebits.python
|
214
|
+
sources/python/nsievebits.python-2.python
|
215
|
+
sources/python/objinst.python
|
216
|
+
sources/python/partialsums.python
|
217
|
+
sources/python/partialsums.python-2.python
|
218
|
+
sources/python/partialsums.python-3.python
|
219
|
+
sources/python/pidigits.python
|
220
|
+
sources/python/pidigits.python-3.python
|
221
|
+
sources/python/pidigits.python-4.python
|
222
|
+
sources/python/process.python
|
223
|
+
sources/python/process.python-2.python
|
224
|
+
sources/python/prodcons.python
|
225
|
+
sources/python/prodcons.python-2.python
|
226
|
+
sources/python/random.python
|
227
|
+
sources/python/raytracer.python
|
228
|
+
sources/python/recursive.python
|
229
|
+
sources/python/regexdna.python
|
230
|
+
sources/python/regexdna.python-2.python
|
231
|
+
sources/python/regexmatch.python
|
232
|
+
sources/python/revcomp.python-3.python
|
233
|
+
sources/python/reversefile.python
|
234
|
+
sources/python/reversefile.python-2.python
|
235
|
+
sources/python/reversefile.python-3.python
|
236
|
+
sources/python/sieve.python
|
237
|
+
sources/python/spectralnorm.python-2.python
|
238
|
+
sources/python/spellcheck.python
|
239
|
+
sources/python/strcat.python
|
240
|
+
sources/python/strcat.python-2.python
|
241
|
+
sources/python/sumcol.python-2.python
|
242
|
+
sources/python/sumcol.python-3.python
|
243
|
+
sources/python/takfp.python
|
244
|
+
sources/python/tcpecho.python
|
245
|
+
sources/python/tcprequest.python
|
246
|
+
sources/python/tcpstream.python
|
247
|
+
sources/python/threadring.python
|
248
|
+
sources/python/threadring.python-2.python
|
249
|
+
sources/python/threadring.python-3.python
|
250
|
+
sources/python/wc.python-2.python
|
251
|
+
sources/python/wordfreq.python
|
252
|
+
sources/python/wordfreq.python-2.python
|
253
|
+
sources/python/wordfreq.python-3.python
|
254
|
+
sources/python/wordfreq.python-4.python
|
255
|
+
sources/python/wordfreq.python-5.python
|
256
|
+
sources/ruby/ackermann.ruby
|
257
|
+
sources/ruby/ackermann.ruby-5.ruby
|
258
|
+
sources/ruby/ary.ruby
|
259
|
+
sources/ruby/binarytrees.ruby-2.ruby
|
260
|
+
sources/ruby/chameneos.ruby-2.ruby
|
261
|
+
sources/ruby/dispatch.ruby
|
262
|
+
sources/ruby/echo.ruby
|
263
|
+
sources/ruby/except.ruby
|
264
|
+
sources/ruby/except.ruby-2.ruby
|
265
|
+
sources/ruby/fannkuch.ruby
|
266
|
+
sources/ruby/fasta.ruby
|
267
|
+
sources/ruby/fibo.ruby
|
268
|
+
sources/ruby/harmonic.ruby-2.ruby
|
269
|
+
sources/ruby/hash.ruby
|
270
|
+
sources/ruby/hash2.ruby
|
271
|
+
sources/ruby/heapsort.ruby
|
272
|
+
sources/ruby/hello.ruby
|
273
|
+
sources/ruby/knucleotide.ruby-2.ruby
|
274
|
+
sources/ruby/lists.ruby
|
275
|
+
sources/ruby/mandelbrot.ruby-3.ruby
|
276
|
+
sources/ruby/matrix.ruby
|
277
|
+
sources/ruby/matrix.ruby-2.ruby
|
278
|
+
sources/ruby/message.ruby
|
279
|
+
sources/ruby/message.ruby-2.ruby
|
280
|
+
sources/ruby/meteor.ruby
|
281
|
+
sources/ruby/meteor.ruby-2.ruby
|
282
|
+
sources/ruby/methcall.ruby
|
283
|
+
sources/ruby/methcall.ruby-2.ruby
|
284
|
+
sources/ruby/moments.ruby
|
285
|
+
sources/ruby/nbody.ruby-2.ruby
|
286
|
+
sources/ruby/nestedloop.ruby
|
287
|
+
sources/ruby/nsieve.ruby
|
288
|
+
sources/ruby/nsieve.ruby-2.ruby
|
289
|
+
sources/ruby/nsievebits.ruby-2.ruby
|
290
|
+
sources/ruby/objinst.ruby
|
291
|
+
sources/ruby/partialsums.ruby
|
292
|
+
sources/ruby/pidigits.ruby
|
293
|
+
sources/ruby/pidigits.ruby-2.ruby
|
294
|
+
sources/ruby/prodcons.ruby
|
295
|
+
sources/ruby/random.ruby
|
296
|
+
sources/ruby/recursive.ruby-2.ruby
|
297
|
+
sources/ruby/regexdna.ruby
|
298
|
+
sources/ruby/regexdna.ruby-2.ruby
|
299
|
+
sources/ruby/regexmatch.ruby
|
300
|
+
sources/ruby/revcomp.ruby
|
301
|
+
sources/ruby/reversefile.ruby
|
302
|
+
sources/ruby/sieve.ruby
|
303
|
+
sources/ruby/spectralnorm.ruby
|
304
|
+
sources/ruby/spellcheck.ruby
|
305
|
+
sources/ruby/spellcheck.ruby-2.ruby
|
306
|
+
sources/ruby/strcat.ruby
|
307
|
+
sources/ruby/strcat.ruby-2.ruby
|
308
|
+
sources/ruby/sumcol.ruby
|
309
|
+
sources/ruby/sumcol.ruby-2.ruby
|
310
|
+
sources/ruby/takfp.ruby
|
311
|
+
sources/ruby/tcpecho.ruby
|
312
|
+
sources/ruby/tcprequest.ruby
|
313
|
+
sources/ruby/tcpstream.ruby
|
314
|
+
sources/ruby/threadring.ruby
|
315
|
+
sources/ruby/threadring.ruby-2.ruby
|
316
|
+
sources/ruby/wc.ruby
|
317
|
+
sources/ruby/wordfreq.ruby
|
318
|
+
sources/ruby/wordfreq.ruby2.ruby
|
319
|
+
test/fixtures/sources/gcc/ackermann.gcc-2.gcc
|
320
|
+
test/fixtures/sources/python/ackermann.python
|
321
|
+
test/fixtures/sources/ruby/ackermann.ruby
|
322
|
+
test/test_source_classifier.rb
|
323
|
+
test/test_trainer.rb
|
324
|
+
trainer.bin
|
325
|
+
Manifest
|
data/README.textile
ADDED
@@ -0,0 +1,51 @@
|
|
1
|
+
h1. SourceClassifier
|
2
|
+
|
3
|
+
Source classifier identifies programming language using a Bayesian classifier trained on a corpus generated from the "Computer Language Benchmarks Game":http://shootout.alioth.debian.org/ . It is written in Ruby and availabe as a gem. To train the classifier to identify new languages download the sources from github.
|
4
|
+
|
5
|
+
h2. Usage
|
6
|
+
|
7
|
+
First install the gem using github as a source
|
8
|
+
|
9
|
+
$ gem sources -a http://gems.github.com
|
10
|
+
$ sudo gem install chrislo-sourceclassifier
|
11
|
+
|
12
|
+
Then, to use
|
13
|
+
|
14
|
+
<pre>
|
15
|
+
require 'rubygems'
|
16
|
+
require 'sourceclassifier'
|
17
|
+
|
18
|
+
s = SourceClassifier.new
|
19
|
+
|
20
|
+
ruby_text = <<EOT
|
21
|
+
def my_sorting_function(a)
|
22
|
+
a.sort
|
23
|
+
end
|
24
|
+
EOT
|
25
|
+
|
26
|
+
c_text = <<EOT
|
27
|
+
#include <unistd.h>
|
28
|
+
|
29
|
+
int main() {
|
30
|
+
write(1, "hello world\n", 12);
|
31
|
+
return(0);
|
32
|
+
}
|
33
|
+
EOT
|
34
|
+
|
35
|
+
s.identify(ruby_text) #=> Ruby
|
36
|
+
s.identify(c_text) #=> Gcc
|
37
|
+
</pre>
|
38
|
+
|
39
|
+
h2. Training
|
40
|
+
|
41
|
+
Download the sources from github and in the directory run the training rake test
|
42
|
+
|
43
|
+
$ rake train
|
44
|
+
|
45
|
+
In the ./sources directory are subdirectories for each language you wish to be able to identify. Each subdirectory contains examples of programs written in that language. The name of the directory is significant - it is the value returned by the SourceClassifier.identify() method.
|
46
|
+
|
47
|
+
The rake task populate can be used to build these subdirectories from a checkout of the "computer language shootout sources":http://alioth.debian.org/scm/?group_id=30402 but you are free to train the classifier using any available examples.
|
48
|
+
|
49
|
+
h2. Acknowledgments
|
50
|
+
|
51
|
+
This library depends heavily on the great "Classifier":http://classifier.rubyforge.org/ gem by Lucas Carlson and David Fayram II.
|
data/Rakefile
ADDED
@@ -0,0 +1,64 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
|
3
|
+
require 'rake'
|
4
|
+
require 'rake/testtask'
|
5
|
+
require 'find'
|
6
|
+
require 'fileutils'
|
7
|
+
require 'echoe'
|
8
|
+
|
9
|
+
Echoe.new('sourceclassifier', '0.2.0') do |p|
|
10
|
+
p.description = "Determine the programming language used in a sample"
|
11
|
+
p.url = "http://github.com/chrislo/sourceclassifier/tree/master"
|
12
|
+
p.author = "Chris Lowis"
|
13
|
+
p.email = "chris.lowis@gmail.com"
|
14
|
+
p.ignore_pattern = ["tmp/*", "script/*", "sources/*"]
|
15
|
+
p.runtime_dependencies = ["classifier"]
|
16
|
+
p.development_dependencies = []
|
17
|
+
end
|
18
|
+
|
19
|
+
Dir["#{File.dirname(__FILE__)}/tasks/*.rake"].sort.each { |ext| load ext }
|
20
|
+
|
21
|
+
module Find
|
22
|
+
def match(*paths)
|
23
|
+
matched = []
|
24
|
+
find(*paths) { |path| matched << path if yield path }
|
25
|
+
return matched
|
26
|
+
end
|
27
|
+
module_function :match
|
28
|
+
end
|
29
|
+
|
30
|
+
SHOOTOUT_CVS_ROOT = '/Users/chris/tmp/shootout-scm-2008-08-24/'
|
31
|
+
|
32
|
+
task :default => [:test_units]
|
33
|
+
|
34
|
+
desc "Run basic tests"
|
35
|
+
Rake::TestTask.new("test_units") { |t|
|
36
|
+
t.pattern = 'test/test_*.rb'
|
37
|
+
t.verbose = false
|
38
|
+
t.warning = false
|
39
|
+
}
|
40
|
+
|
41
|
+
desc "Populate training data directories"
|
42
|
+
task :populate do
|
43
|
+
languages = %w[ruby python gcc perl java javascript]
|
44
|
+
|
45
|
+
languages.each do |language|
|
46
|
+
# create directories
|
47
|
+
target_dir = "./sources/#{language}"
|
48
|
+
FileUtils.mkdir_p target_dir
|
49
|
+
|
50
|
+
# copy source files to appropriate directories
|
51
|
+
Find.match(SHOOTOUT_CVS_ROOT) do |file|
|
52
|
+
this_ext = File.extname(file).downcase
|
53
|
+
if this_ext == ".#{language}"
|
54
|
+
FileUtils.cp file, "#{target_dir}/#{File.basename(file)}"
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
desc "Train using training data directory"
|
61
|
+
task :train do
|
62
|
+
require './lib/trainer.rb'
|
63
|
+
t = Trainer.new('./sources','./')
|
64
|
+
end
|
data/examples/example.rb
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'sourceclassifier'
|
3
|
+
|
4
|
+
s = SourceClassifier.new
|
5
|
+
|
6
|
+
ruby_text = <<EOT
|
7
|
+
def my_sorting_function(a)
|
8
|
+
a.sort
|
9
|
+
end
|
10
|
+
EOT
|
11
|
+
|
12
|
+
c_text = <<EOT
|
13
|
+
#include <unistd.h>
|
14
|
+
|
15
|
+
int main() {
|
16
|
+
write(1, "hello world\n", 12);
|
17
|
+
return(0);
|
18
|
+
}
|
19
|
+
EOT
|
20
|
+
|
21
|
+
s.identify(ruby_text) #=> Ruby
|
22
|
+
s.identify(c_text) #=> Gcc
|