chrislo-sourceclassifier 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (326) hide show
  1. data/Manifest +325 -0
  2. data/README.textile +51 -0
  3. data/Rakefile +64 -0
  4. data/examples/example.rb +22 -0
  5. data/lib/sourceclassifier.rb +22 -0
  6. data/lib/trainer.rb +51 -0
  7. data/sourceclassifier.gemspec +35 -0
  8. data/sources/gcc/ackermann.gcc-2.gcc +93 -0
  9. data/sources/gcc/ackermann.gcc-3.gcc +20 -0
  10. data/sources/gcc/ary.gcc +40 -0
  11. data/sources/gcc/binarytrees.gcc +136 -0
  12. data/sources/gcc/binarytrees.gcc-2.gcc +140 -0
  13. data/sources/gcc/binarytrees.gcc-3.gcc +134 -0
  14. data/sources/gcc/chameneos.gcc +117 -0
  15. data/sources/gcc/chameneos.gcc-2.gcc +134 -0
  16. data/sources/gcc/chameneos.gcc-3.gcc +120 -0
  17. data/sources/gcc/chameneosredux.gcc +306 -0
  18. data/sources/gcc/echo.gcc +144 -0
  19. data/sources/gcc/except.gcc +52 -0
  20. data/sources/gcc/fannkuch.gcc +105 -0
  21. data/sources/gcc/fannkuch.gcc-2.gcc +94 -0
  22. data/sources/gcc/fasta.gcc +158 -0
  23. data/sources/gcc/fasta.gcc-2.gcc +132 -0
  24. data/sources/gcc/fasta.gcc-3.gcc +145 -0
  25. data/sources/gcc/fasta.gcc-4.gcc +141 -0
  26. data/sources/gcc/fibo.gcc +20 -0
  27. data/sources/gcc/harmonic.gcc +22 -0
  28. data/sources/gcc/hash.gcc +31 -0
  29. data/sources/gcc/hash2.gcc +38 -0
  30. data/sources/gcc/heapsort.gcc +72 -0
  31. data/sources/gcc/hello.gcc-2.gcc +12 -0
  32. data/sources/gcc/hello.gcc-3.gcc +20 -0
  33. data/sources/gcc/knucleotide.gcc +173 -0
  34. data/sources/gcc/knucleotide.gcc-2.gcc +168 -0
  35. data/sources/gcc/lists.gcc +226 -0
  36. data/sources/gcc/magicsquares.gcc +378 -0
  37. data/sources/gcc/mandelbrot.gcc +84 -0
  38. data/sources/gcc/mandelbrot.gcc-2.gcc +63 -0
  39. data/sources/gcc/mandelbrot.gcc-3.gcc +91 -0
  40. data/sources/gcc/mandelbrot.gcc-4.gcc +78 -0
  41. data/sources/gcc/matrix.gcc +66 -0
  42. data/sources/gcc/message.gcc-2.gcc +72 -0
  43. data/sources/gcc/message.gcc-3.gcc +64 -0
  44. data/sources/gcc/methcall.gcc +89 -0
  45. data/sources/gcc/moments.gcc +120 -0
  46. data/sources/gcc/nbody.gcc +142 -0
  47. data/sources/gcc/nbody.gcc-2.gcc +143 -0
  48. data/sources/gcc/nbody.gcc-3.gcc +142 -0
  49. data/sources/gcc/nestedloop.gcc +25 -0
  50. data/sources/gcc/nsieve.gcc +35 -0
  51. data/sources/gcc/nsievebits.gcc-2.gcc +37 -0
  52. data/sources/gcc/nsievebits.gcc-3.gcc +53 -0
  53. data/sources/gcc/objinst.gcc +95 -0
  54. data/sources/gcc/partialsums.gcc +86 -0
  55. data/sources/gcc/partialsums.gcc-3.gcc +81 -0
  56. data/sources/gcc/partialsums.gcc-4.gcc +68 -0
  57. data/sources/gcc/pidigits.gcc +105 -0
  58. data/sources/gcc/primes.gcc +75 -0
  59. data/sources/gcc/prodcons.gcc +86 -0
  60. data/sources/gcc/random.gcc +29 -0
  61. data/sources/gcc/raytracer.gcc +125 -0
  62. data/sources/gcc/raytracer.gcc-2.gcc +181 -0
  63. data/sources/gcc/recursive.gcc +68 -0
  64. data/sources/gcc/recursive.gcc-2.gcc +55 -0
  65. data/sources/gcc/regexdna.gcc-2.gcc +126 -0
  66. data/sources/gcc/regexmatch.gcc +136 -0
  67. data/sources/gcc/revcomp.gcc +85 -0
  68. data/sources/gcc/revcomp.gcc-2.gcc +88 -0
  69. data/sources/gcc/revcomp.gcc-4.gcc +71 -0
  70. data/sources/gcc/reversefile.gcc +103 -0
  71. data/sources/gcc/reversefile.gcc-2.gcc +56 -0
  72. data/sources/gcc/sieve.gcc +34 -0
  73. data/sources/gcc/spectralnorm.gcc +54 -0
  74. data/sources/gcc/spellcheck.gcc +72 -0
  75. data/sources/gcc/spellcheck.gcc-2.gcc +61 -0
  76. data/sources/gcc/strcat.gcc +38 -0
  77. data/sources/gcc/sumcol.gcc-2.gcc +98 -0
  78. data/sources/gcc/sumcol.gcc-3.gcc +22 -0
  79. data/sources/gcc/sumcol.gcc-4.gcc +18 -0
  80. data/sources/gcc/sumcol.gcc-5.gcc +32 -0
  81. data/sources/gcc/takfp.gcc +23 -0
  82. data/sources/gcc/tcp-stream.gcc +122 -0
  83. data/sources/gcc/tcpecho.gcc +122 -0
  84. data/sources/gcc/tcpecho.gcc-2.gcc +136 -0
  85. data/sources/gcc/tcprequest.gcc +122 -0
  86. data/sources/gcc/threadring.gcc +73 -0
  87. data/sources/gcc/wc.gcc +52 -0
  88. data/sources/gcc/wc.gcc-2.gcc +40 -0
  89. data/sources/gcc/wc.gcc-3.gcc +46 -0
  90. data/sources/gcc/wordfreq.gcc-2.gcc +85 -0
  91. data/sources/perl/ackermann.perl +28 -0
  92. data/sources/perl/ackermann.perl-2.perl +25 -0
  93. data/sources/perl/ackermann.perl-3.perl +20 -0
  94. data/sources/perl/ackermann.perl-4.perl +18 -0
  95. data/sources/perl/ary.perl +25 -0
  96. data/sources/perl/ary.perl-2.perl +23 -0
  97. data/sources/perl/binarytrees.perl +66 -0
  98. data/sources/perl/binarytrees.perl-2.perl +71 -0
  99. data/sources/perl/chameneos.perl +67 -0
  100. data/sources/perl/echo.perl +81 -0
  101. data/sources/perl/except.perl +73 -0
  102. data/sources/perl/fannkuch.perl +44 -0
  103. data/sources/perl/fannkuch.perl-2.perl +38 -0
  104. data/sources/perl/fasta.perl +112 -0
  105. data/sources/perl/fasta.perl-2.perl +135 -0
  106. data/sources/perl/fasta.perl-4.perl +122 -0
  107. data/sources/perl/fibo.perl +15 -0
  108. data/sources/perl/fibo.perl-2.perl +0 -0
  109. data/sources/perl/fibo.perl-3.perl +0 -0
  110. data/sources/perl/harmonic.perl +7 -0
  111. data/sources/perl/hash.perl +23 -0
  112. data/sources/perl/hash.perl-2.perl +17 -0
  113. data/sources/perl/hash.perl-3.perl +24 -0
  114. data/sources/perl/hash2.perl +16 -0
  115. data/sources/perl/heapsort.perl +65 -0
  116. data/sources/perl/heapsort.perl-2.perl +0 -0
  117. data/sources/perl/hello.perl +5 -0
  118. data/sources/perl/knucleotide.perl-2.perl +30 -0
  119. data/sources/perl/lists.perl +48 -0
  120. data/sources/perl/mandelbrot.perl-2.perl +32 -0
  121. data/sources/perl/matrix.perl +59 -0
  122. data/sources/perl/matrix.perl-2.perl +0 -0
  123. data/sources/perl/matrix.perl-3.perl +0 -0
  124. data/sources/perl/message.perl +27 -0
  125. data/sources/perl/methcall.perl +66 -0
  126. data/sources/perl/moments.perl +44 -0
  127. data/sources/perl/nbody.perl +108 -0
  128. data/sources/perl/nestedloop.perl +28 -0
  129. data/sources/perl/nsieve.perl-2.perl +41 -0
  130. data/sources/perl/nsieve.perl-4.perl +43 -0
  131. data/sources/perl/nsievebits.perl +37 -0
  132. data/sources/perl/objinst.perl +73 -0
  133. data/sources/perl/partialsums.perl-3.perl +31 -0
  134. data/sources/perl/pidigits.perl +52 -0
  135. data/sources/perl/pidigits.perl-2.perl +47 -0
  136. data/sources/perl/process.perl +50 -0
  137. data/sources/perl/prodcons.perl +47 -0
  138. data/sources/perl/random.perl-4.perl +17 -0
  139. data/sources/perl/recursive.perl-2.perl +57 -0
  140. data/sources/perl/regexdna.perl +48 -0
  141. data/sources/perl/regexdna.perl-2.perl +43 -0
  142. data/sources/perl/regexdna.perl-3.perl +50 -0
  143. data/sources/perl/regexdna.perl-4.perl +49 -0
  144. data/sources/perl/regexdna.perl-5.perl +42 -0
  145. data/sources/perl/regexdna.perl-6.perl +43 -0
  146. data/sources/perl/regexmatch.perl +35 -0
  147. data/sources/perl/revcomp.perl-2.perl +34 -0
  148. data/sources/perl/reversefile.perl +8 -0
  149. data/sources/perl/reversefile.perl-2.perl +0 -0
  150. data/sources/perl/reversefile.perl-3.perl +0 -0
  151. data/sources/perl/sieve.perl +23 -0
  152. data/sources/perl/spectralnorm.perl-2.perl +54 -0
  153. data/sources/perl/spellcheck.perl +24 -0
  154. data/sources/perl/strcat.perl +13 -0
  155. data/sources/perl/strcat.perl-2.perl +0 -0
  156. data/sources/perl/sumcol.perl +8 -0
  157. data/sources/perl/takfp.perl +23 -0
  158. data/sources/perl/takfp.perl-3.perl +20 -0
  159. data/sources/perl/tcpecho.perl +61 -0
  160. data/sources/perl/tcprequest.perl +61 -0
  161. data/sources/perl/tcpstream.perl +61 -0
  162. data/sources/perl/threadring.perl +55 -0
  163. data/sources/perl/threadring.perl-2.perl +43 -0
  164. data/sources/perl/wc.perl +20 -0
  165. data/sources/perl/wc.perl-2.perl +14 -0
  166. data/sources/perl/wordfreq.perl +22 -0
  167. data/sources/perl/wordfreq.perl-3.perl +0 -0
  168. data/sources/perl/wordfreq.perl3.perl +0 -0
  169. data/sources/python/ackermann.python +21 -0
  170. data/sources/python/ary.python +19 -0
  171. data/sources/python/binarytrees.python +39 -0
  172. data/sources/python/binarytrees.python-3.python +44 -0
  173. data/sources/python/chameneos.python-6.python +73 -0
  174. data/sources/python/chameneosredux.python +126 -0
  175. data/sources/python/chameneosredux.python-2.python +122 -0
  176. data/sources/python/dispatch.python +176 -0
  177. data/sources/python/dispatch.python-2.python +136 -0
  178. data/sources/python/echo.python +64 -0
  179. data/sources/python/except.python +62 -0
  180. data/sources/python/fannkuch.python +50 -0
  181. data/sources/python/fannkuch.python-2.python +54 -0
  182. data/sources/python/fasta.python-2.python +79 -0
  183. data/sources/python/fibo.python +17 -0
  184. data/sources/python/fibo.python-2.python +0 -0
  185. data/sources/python/fibo.python-3.python +0 -0
  186. data/sources/python/harmonic.python-2.python +9 -0
  187. data/sources/python/hash.python +21 -0
  188. data/sources/python/hash.python-2.python +0 -0
  189. data/sources/python/hash2.python +30 -0
  190. data/sources/python/heapsort.python-3.python +66 -0
  191. data/sources/python/hello.python +5 -0
  192. data/sources/python/implicitode.python +231 -0
  193. data/sources/python/knucleotide.python +55 -0
  194. data/sources/python/lists.python +44 -0
  195. data/sources/python/magicsquares.python +145 -0
  196. data/sources/python/mandelbrot.python +44 -0
  197. data/sources/python/mandelbrot.python-2.python +35 -0
  198. data/sources/python/mandelbrot.python-3.python +46 -0
  199. data/sources/python/matrix.python +34 -0
  200. data/sources/python/matrix.python-2.python +23 -0
  201. data/sources/python/message.python +24 -0
  202. data/sources/python/message.python-2.python +20 -0
  203. data/sources/python/message.python-3.python +19 -0
  204. data/sources/python/meteor.python +210 -0
  205. data/sources/python/meteor.python-2.python +192 -0
  206. data/sources/python/methcall.python +51 -0
  207. data/sources/python/moments.python +65 -0
  208. data/sources/python/nbody.python +123 -0
  209. data/sources/python/nbody.python-2.python +120 -0
  210. data/sources/python/nestedloop.python +24 -0
  211. data/sources/python/nsieve.python +27 -0
  212. data/sources/python/nsieve.python-2.python +23 -0
  213. data/sources/python/nsieve.python-4.python +25 -0
  214. data/sources/python/nsievebits.python +27 -0
  215. data/sources/python/nsievebits.python-2.python +43 -0
  216. data/sources/python/objinst.python +53 -0
  217. data/sources/python/partialsums.python +37 -0
  218. data/sources/python/partialsums.python-2.python +35 -0
  219. data/sources/python/partialsums.python-3.python +48 -0
  220. data/sources/python/pidigits.python +38 -0
  221. data/sources/python/pidigits.python-3.python +63 -0
  222. data/sources/python/pidigits.python-4.python +24 -0
  223. data/sources/python/process.python +51 -0
  224. data/sources/python/process.python-2.python +133 -0
  225. data/sources/python/prodcons.python +51 -0
  226. data/sources/python/prodcons.python-2.python +0 -0
  227. data/sources/python/random.python +27 -0
  228. data/sources/python/raytracer.python +203 -0
  229. data/sources/python/recursive.python +35 -0
  230. data/sources/python/regexdna.python +39 -0
  231. data/sources/python/regexdna.python-2.python +34 -0
  232. data/sources/python/regexmatch.python +36 -0
  233. data/sources/python/revcomp.python-3.python +31 -0
  234. data/sources/python/reversefile.python +13 -0
  235. data/sources/python/reversefile.python-2.python +0 -0
  236. data/sources/python/reversefile.python-3.python +0 -0
  237. data/sources/python/sieve.python +50 -0
  238. data/sources/python/spectralnorm.python-2.python +36 -0
  239. data/sources/python/spellcheck.python +17 -0
  240. data/sources/python/strcat.python +35 -0
  241. data/sources/python/strcat.python-2.python +0 -0
  242. data/sources/python/sumcol.python-2.python +0 -0
  243. data/sources/python/sumcol.python-3.python +0 -0
  244. data/sources/python/takfp.python +19 -0
  245. data/sources/python/tcpecho.python +67 -0
  246. data/sources/python/tcprequest.python +67 -0
  247. data/sources/python/tcpstream.python +67 -0
  248. data/sources/python/threadring.python +47 -0
  249. data/sources/python/threadring.python-2.python +40 -0
  250. data/sources/python/threadring.python-3.python +34 -0
  251. data/sources/python/wc.python-2.python +19 -0
  252. data/sources/python/wordfreq.python +43 -0
  253. data/sources/python/wordfreq.python-2.python +0 -0
  254. data/sources/python/wordfreq.python-3.python +28 -0
  255. data/sources/python/wordfreq.python-4.python +38 -0
  256. data/sources/python/wordfreq.python-5.python +39 -0
  257. data/sources/ruby/ackermann.ruby +17 -0
  258. data/sources/ruby/ackermann.ruby-5.ruby +153 -0
  259. data/sources/ruby/ary.ruby +22 -0
  260. data/sources/ruby/binarytrees.ruby-2.ruby +55 -0
  261. data/sources/ruby/chameneos.ruby-2.ruby +71 -0
  262. data/sources/ruby/dispatch.ruby +114 -0
  263. data/sources/ruby/echo.ruby +41 -0
  264. data/sources/ruby/except.ruby +61 -0
  265. data/sources/ruby/except.ruby-2.ruby +61 -0
  266. data/sources/ruby/fannkuch.ruby +42 -0
  267. data/sources/ruby/fasta.ruby +81 -0
  268. data/sources/ruby/fibo.ruby +15 -0
  269. data/sources/ruby/harmonic.ruby-2.ruby +15 -0
  270. data/sources/ruby/hash.ruby +19 -0
  271. data/sources/ruby/hash2.ruby +23 -0
  272. data/sources/ruby/heapsort.ruby +55 -0
  273. data/sources/ruby/hello.ruby +6 -0
  274. data/sources/ruby/knucleotide.ruby-2.ruby +44 -0
  275. data/sources/ruby/lists.ruby +46 -0
  276. data/sources/ruby/mandelbrot.ruby-3.ruby +63 -0
  277. data/sources/ruby/matrix.ruby +40 -0
  278. data/sources/ruby/matrix.ruby-2.ruby +30 -0
  279. data/sources/ruby/message.ruby +29 -0
  280. data/sources/ruby/message.ruby-2.ruby +24 -0
  281. data/sources/ruby/meteor.ruby +386 -0
  282. data/sources/ruby/meteor.ruby-2.ruby +561 -0
  283. data/sources/ruby/methcall.ruby +58 -0
  284. data/sources/ruby/methcall.ruby-2.ruby +54 -0
  285. data/sources/ruby/moments.ruby +64 -0
  286. data/sources/ruby/nbody.ruby-2.ruby +145 -0
  287. data/sources/ruby/nestedloop.ruby +22 -0
  288. data/sources/ruby/nsieve.ruby +36 -0
  289. data/sources/ruby/nsieve.ruby-2.ruby +25 -0
  290. data/sources/ruby/nsievebits.ruby-2.ruby +42 -0
  291. data/sources/ruby/objinst.ruby +58 -0
  292. data/sources/ruby/partialsums.ruby +39 -0
  293. data/sources/ruby/pidigits.ruby +92 -0
  294. data/sources/ruby/pidigits.ruby-2.ruby +109 -0
  295. data/sources/ruby/prodcons.ruby +41 -0
  296. data/sources/ruby/random.ruby +17 -0
  297. data/sources/ruby/recursive.ruby-2.ruby +53 -0
  298. data/sources/ruby/regexdna.ruby +32 -0
  299. data/sources/ruby/regexdna.ruby-2.ruby +38 -0
  300. data/sources/ruby/regexmatch.ruby +33 -0
  301. data/sources/ruby/revcomp.ruby +28 -0
  302. data/sources/ruby/reversefile.ruby +7 -0
  303. data/sources/ruby/sieve.ruby +30 -0
  304. data/sources/ruby/spectralnorm.ruby +48 -0
  305. data/sources/ruby/spellcheck.ruby +18 -0
  306. data/sources/ruby/spellcheck.ruby-2.ruby +0 -0
  307. data/sources/ruby/strcat.ruby +12 -0
  308. data/sources/ruby/strcat.ruby-2.ruby +12 -0
  309. data/sources/ruby/sumcol.ruby +12 -0
  310. data/sources/ruby/sumcol.ruby-2.ruby +5 -0
  311. data/sources/ruby/takfp.ruby +15 -0
  312. data/sources/ruby/tcpecho.ruby +45 -0
  313. data/sources/ruby/tcprequest.ruby +45 -0
  314. data/sources/ruby/tcpstream.ruby +45 -0
  315. data/sources/ruby/threadring.ruby +61 -0
  316. data/sources/ruby/threadring.ruby-2.ruby +33 -0
  317. data/sources/ruby/wc.ruby +15 -0
  318. data/sources/ruby/wordfreq.ruby +17 -0
  319. data/sources/ruby/wordfreq.ruby2.ruby +0 -0
  320. data/test/fixtures/sources/gcc/ackermann.gcc-2.gcc +93 -0
  321. data/test/fixtures/sources/python/ackermann.python +21 -0
  322. data/test/fixtures/sources/ruby/ackermann.ruby +17 -0
  323. data/test/test_source_classifier.rb +40 -0
  324. data/test/test_trainer.rb +34 -0
  325. data/trainer.bin +1193 -0
  326. metadata +393 -0
data/Manifest ADDED
@@ -0,0 +1,325 @@
1
+ examples/example.rb
2
+ lib/sourceclassifier.rb
3
+ lib/trainer.rb
4
+ Rakefile
5
+ README.textile
6
+ sourceclassifier.gemspec
7
+ sources/gcc/ackermann.gcc-2.gcc
8
+ sources/gcc/ackermann.gcc-3.gcc
9
+ sources/gcc/ary.gcc
10
+ sources/gcc/binarytrees.gcc
11
+ sources/gcc/binarytrees.gcc-2.gcc
12
+ sources/gcc/binarytrees.gcc-3.gcc
13
+ sources/gcc/chameneos.gcc
14
+ sources/gcc/chameneos.gcc-2.gcc
15
+ sources/gcc/chameneos.gcc-3.gcc
16
+ sources/gcc/chameneosredux.gcc
17
+ sources/gcc/echo.gcc
18
+ sources/gcc/except.gcc
19
+ sources/gcc/fannkuch.gcc
20
+ sources/gcc/fannkuch.gcc-2.gcc
21
+ sources/gcc/fasta.gcc
22
+ sources/gcc/fasta.gcc-2.gcc
23
+ sources/gcc/fasta.gcc-3.gcc
24
+ sources/gcc/fasta.gcc-4.gcc
25
+ sources/gcc/fibo.gcc
26
+ sources/gcc/harmonic.gcc
27
+ sources/gcc/hash.gcc
28
+ sources/gcc/hash2.gcc
29
+ sources/gcc/heapsort.gcc
30
+ sources/gcc/hello.gcc-2.gcc
31
+ sources/gcc/hello.gcc-3.gcc
32
+ sources/gcc/knucleotide.gcc
33
+ sources/gcc/knucleotide.gcc-2.gcc
34
+ sources/gcc/lists.gcc
35
+ sources/gcc/magicsquares.gcc
36
+ sources/gcc/mandelbrot.gcc
37
+ sources/gcc/mandelbrot.gcc-2.gcc
38
+ sources/gcc/mandelbrot.gcc-3.gcc
39
+ sources/gcc/mandelbrot.gcc-4.gcc
40
+ sources/gcc/matrix.gcc
41
+ sources/gcc/message.gcc-2.gcc
42
+ sources/gcc/message.gcc-3.gcc
43
+ sources/gcc/methcall.gcc
44
+ sources/gcc/moments.gcc
45
+ sources/gcc/nbody.gcc
46
+ sources/gcc/nbody.gcc-2.gcc
47
+ sources/gcc/nbody.gcc-3.gcc
48
+ sources/gcc/nestedloop.gcc
49
+ sources/gcc/nsieve.gcc
50
+ sources/gcc/nsievebits.gcc-2.gcc
51
+ sources/gcc/nsievebits.gcc-3.gcc
52
+ sources/gcc/objinst.gcc
53
+ sources/gcc/partialsums.gcc
54
+ sources/gcc/partialsums.gcc-3.gcc
55
+ sources/gcc/partialsums.gcc-4.gcc
56
+ sources/gcc/pidigits.gcc
57
+ sources/gcc/primes.gcc
58
+ sources/gcc/prodcons.gcc
59
+ sources/gcc/random.gcc
60
+ sources/gcc/raytracer.gcc
61
+ sources/gcc/raytracer.gcc-2.gcc
62
+ sources/gcc/recursive.gcc
63
+ sources/gcc/recursive.gcc-2.gcc
64
+ sources/gcc/regexdna.gcc-2.gcc
65
+ sources/gcc/regexmatch.gcc
66
+ sources/gcc/revcomp.gcc
67
+ sources/gcc/revcomp.gcc-2.gcc
68
+ sources/gcc/revcomp.gcc-4.gcc
69
+ sources/gcc/reversefile.gcc
70
+ sources/gcc/reversefile.gcc-2.gcc
71
+ sources/gcc/sieve.gcc
72
+ sources/gcc/spectralnorm.gcc
73
+ sources/gcc/spellcheck.gcc
74
+ sources/gcc/spellcheck.gcc-2.gcc
75
+ sources/gcc/strcat.gcc
76
+ sources/gcc/sumcol.gcc-2.gcc
77
+ sources/gcc/sumcol.gcc-3.gcc
78
+ sources/gcc/sumcol.gcc-4.gcc
79
+ sources/gcc/sumcol.gcc-5.gcc
80
+ sources/gcc/takfp.gcc
81
+ sources/gcc/tcp-stream.gcc
82
+ sources/gcc/tcpecho.gcc
83
+ sources/gcc/tcpecho.gcc-2.gcc
84
+ sources/gcc/tcprequest.gcc
85
+ sources/gcc/threadring.gcc
86
+ sources/gcc/wc.gcc
87
+ sources/gcc/wc.gcc-2.gcc
88
+ sources/gcc/wc.gcc-3.gcc
89
+ sources/gcc/wordfreq.gcc-2.gcc
90
+ sources/perl/ackermann.perl
91
+ sources/perl/ackermann.perl-2.perl
92
+ sources/perl/ackermann.perl-3.perl
93
+ sources/perl/ackermann.perl-4.perl
94
+ sources/perl/ary.perl
95
+ sources/perl/ary.perl-2.perl
96
+ sources/perl/binarytrees.perl
97
+ sources/perl/binarytrees.perl-2.perl
98
+ sources/perl/chameneos.perl
99
+ sources/perl/echo.perl
100
+ sources/perl/except.perl
101
+ sources/perl/fannkuch.perl
102
+ sources/perl/fannkuch.perl-2.perl
103
+ sources/perl/fasta.perl
104
+ sources/perl/fasta.perl-2.perl
105
+ sources/perl/fasta.perl-4.perl
106
+ sources/perl/fibo.perl
107
+ sources/perl/fibo.perl-2.perl
108
+ sources/perl/fibo.perl-3.perl
109
+ sources/perl/harmonic.perl
110
+ sources/perl/hash.perl
111
+ sources/perl/hash.perl-2.perl
112
+ sources/perl/hash.perl-3.perl
113
+ sources/perl/hash2.perl
114
+ sources/perl/heapsort.perl
115
+ sources/perl/heapsort.perl-2.perl
116
+ sources/perl/hello.perl
117
+ sources/perl/knucleotide.perl-2.perl
118
+ sources/perl/lists.perl
119
+ sources/perl/mandelbrot.perl-2.perl
120
+ sources/perl/matrix.perl
121
+ sources/perl/matrix.perl-2.perl
122
+ sources/perl/matrix.perl-3.perl
123
+ sources/perl/message.perl
124
+ sources/perl/methcall.perl
125
+ sources/perl/moments.perl
126
+ sources/perl/nbody.perl
127
+ sources/perl/nestedloop.perl
128
+ sources/perl/nsieve.perl-2.perl
129
+ sources/perl/nsieve.perl-4.perl
130
+ sources/perl/nsievebits.perl
131
+ sources/perl/objinst.perl
132
+ sources/perl/partialsums.perl-3.perl
133
+ sources/perl/pidigits.perl
134
+ sources/perl/pidigits.perl-2.perl
135
+ sources/perl/process.perl
136
+ sources/perl/prodcons.perl
137
+ sources/perl/random.perl-4.perl
138
+ sources/perl/recursive.perl-2.perl
139
+ sources/perl/regexdna.perl
140
+ sources/perl/regexdna.perl-2.perl
141
+ sources/perl/regexdna.perl-3.perl
142
+ sources/perl/regexdna.perl-4.perl
143
+ sources/perl/regexdna.perl-5.perl
144
+ sources/perl/regexdna.perl-6.perl
145
+ sources/perl/regexmatch.perl
146
+ sources/perl/revcomp.perl-2.perl
147
+ sources/perl/reversefile.perl
148
+ sources/perl/reversefile.perl-2.perl
149
+ sources/perl/reversefile.perl-3.perl
150
+ sources/perl/sieve.perl
151
+ sources/perl/spectralnorm.perl-2.perl
152
+ sources/perl/spellcheck.perl
153
+ sources/perl/strcat.perl
154
+ sources/perl/strcat.perl-2.perl
155
+ sources/perl/sumcol.perl
156
+ sources/perl/takfp.perl
157
+ sources/perl/takfp.perl-3.perl
158
+ sources/perl/tcpecho.perl
159
+ sources/perl/tcprequest.perl
160
+ sources/perl/tcpstream.perl
161
+ sources/perl/threadring.perl
162
+ sources/perl/threadring.perl-2.perl
163
+ sources/perl/wc.perl
164
+ sources/perl/wc.perl-2.perl
165
+ sources/perl/wordfreq.perl
166
+ sources/perl/wordfreq.perl-3.perl
167
+ sources/perl/wordfreq.perl3.perl
168
+ sources/python/ackermann.python
169
+ sources/python/ary.python
170
+ sources/python/binarytrees.python
171
+ sources/python/binarytrees.python-3.python
172
+ sources/python/chameneos.python-6.python
173
+ sources/python/chameneosredux.python
174
+ sources/python/chameneosredux.python-2.python
175
+ sources/python/dispatch.python
176
+ sources/python/dispatch.python-2.python
177
+ sources/python/echo.python
178
+ sources/python/except.python
179
+ sources/python/fannkuch.python
180
+ sources/python/fannkuch.python-2.python
181
+ sources/python/fasta.python-2.python
182
+ sources/python/fibo.python
183
+ sources/python/fibo.python-2.python
184
+ sources/python/fibo.python-3.python
185
+ sources/python/harmonic.python-2.python
186
+ sources/python/hash.python
187
+ sources/python/hash.python-2.python
188
+ sources/python/hash2.python
189
+ sources/python/heapsort.python-3.python
190
+ sources/python/hello.python
191
+ sources/python/implicitode.python
192
+ sources/python/knucleotide.python
193
+ sources/python/lists.python
194
+ sources/python/magicsquares.python
195
+ sources/python/mandelbrot.python
196
+ sources/python/mandelbrot.python-2.python
197
+ sources/python/mandelbrot.python-3.python
198
+ sources/python/matrix.python
199
+ sources/python/matrix.python-2.python
200
+ sources/python/message.python
201
+ sources/python/message.python-2.python
202
+ sources/python/message.python-3.python
203
+ sources/python/meteor.python
204
+ sources/python/meteor.python-2.python
205
+ sources/python/methcall.python
206
+ sources/python/moments.python
207
+ sources/python/nbody.python
208
+ sources/python/nbody.python-2.python
209
+ sources/python/nestedloop.python
210
+ sources/python/nsieve.python
211
+ sources/python/nsieve.python-2.python
212
+ sources/python/nsieve.python-4.python
213
+ sources/python/nsievebits.python
214
+ sources/python/nsievebits.python-2.python
215
+ sources/python/objinst.python
216
+ sources/python/partialsums.python
217
+ sources/python/partialsums.python-2.python
218
+ sources/python/partialsums.python-3.python
219
+ sources/python/pidigits.python
220
+ sources/python/pidigits.python-3.python
221
+ sources/python/pidigits.python-4.python
222
+ sources/python/process.python
223
+ sources/python/process.python-2.python
224
+ sources/python/prodcons.python
225
+ sources/python/prodcons.python-2.python
226
+ sources/python/random.python
227
+ sources/python/raytracer.python
228
+ sources/python/recursive.python
229
+ sources/python/regexdna.python
230
+ sources/python/regexdna.python-2.python
231
+ sources/python/regexmatch.python
232
+ sources/python/revcomp.python-3.python
233
+ sources/python/reversefile.python
234
+ sources/python/reversefile.python-2.python
235
+ sources/python/reversefile.python-3.python
236
+ sources/python/sieve.python
237
+ sources/python/spectralnorm.python-2.python
238
+ sources/python/spellcheck.python
239
+ sources/python/strcat.python
240
+ sources/python/strcat.python-2.python
241
+ sources/python/sumcol.python-2.python
242
+ sources/python/sumcol.python-3.python
243
+ sources/python/takfp.python
244
+ sources/python/tcpecho.python
245
+ sources/python/tcprequest.python
246
+ sources/python/tcpstream.python
247
+ sources/python/threadring.python
248
+ sources/python/threadring.python-2.python
249
+ sources/python/threadring.python-3.python
250
+ sources/python/wc.python-2.python
251
+ sources/python/wordfreq.python
252
+ sources/python/wordfreq.python-2.python
253
+ sources/python/wordfreq.python-3.python
254
+ sources/python/wordfreq.python-4.python
255
+ sources/python/wordfreq.python-5.python
256
+ sources/ruby/ackermann.ruby
257
+ sources/ruby/ackermann.ruby-5.ruby
258
+ sources/ruby/ary.ruby
259
+ sources/ruby/binarytrees.ruby-2.ruby
260
+ sources/ruby/chameneos.ruby-2.ruby
261
+ sources/ruby/dispatch.ruby
262
+ sources/ruby/echo.ruby
263
+ sources/ruby/except.ruby
264
+ sources/ruby/except.ruby-2.ruby
265
+ sources/ruby/fannkuch.ruby
266
+ sources/ruby/fasta.ruby
267
+ sources/ruby/fibo.ruby
268
+ sources/ruby/harmonic.ruby-2.ruby
269
+ sources/ruby/hash.ruby
270
+ sources/ruby/hash2.ruby
271
+ sources/ruby/heapsort.ruby
272
+ sources/ruby/hello.ruby
273
+ sources/ruby/knucleotide.ruby-2.ruby
274
+ sources/ruby/lists.ruby
275
+ sources/ruby/mandelbrot.ruby-3.ruby
276
+ sources/ruby/matrix.ruby
277
+ sources/ruby/matrix.ruby-2.ruby
278
+ sources/ruby/message.ruby
279
+ sources/ruby/message.ruby-2.ruby
280
+ sources/ruby/meteor.ruby
281
+ sources/ruby/meteor.ruby-2.ruby
282
+ sources/ruby/methcall.ruby
283
+ sources/ruby/methcall.ruby-2.ruby
284
+ sources/ruby/moments.ruby
285
+ sources/ruby/nbody.ruby-2.ruby
286
+ sources/ruby/nestedloop.ruby
287
+ sources/ruby/nsieve.ruby
288
+ sources/ruby/nsieve.ruby-2.ruby
289
+ sources/ruby/nsievebits.ruby-2.ruby
290
+ sources/ruby/objinst.ruby
291
+ sources/ruby/partialsums.ruby
292
+ sources/ruby/pidigits.ruby
293
+ sources/ruby/pidigits.ruby-2.ruby
294
+ sources/ruby/prodcons.ruby
295
+ sources/ruby/random.ruby
296
+ sources/ruby/recursive.ruby-2.ruby
297
+ sources/ruby/regexdna.ruby
298
+ sources/ruby/regexdna.ruby-2.ruby
299
+ sources/ruby/regexmatch.ruby
300
+ sources/ruby/revcomp.ruby
301
+ sources/ruby/reversefile.ruby
302
+ sources/ruby/sieve.ruby
303
+ sources/ruby/spectralnorm.ruby
304
+ sources/ruby/spellcheck.ruby
305
+ sources/ruby/spellcheck.ruby-2.ruby
306
+ sources/ruby/strcat.ruby
307
+ sources/ruby/strcat.ruby-2.ruby
308
+ sources/ruby/sumcol.ruby
309
+ sources/ruby/sumcol.ruby-2.ruby
310
+ sources/ruby/takfp.ruby
311
+ sources/ruby/tcpecho.ruby
312
+ sources/ruby/tcprequest.ruby
313
+ sources/ruby/tcpstream.ruby
314
+ sources/ruby/threadring.ruby
315
+ sources/ruby/threadring.ruby-2.ruby
316
+ sources/ruby/wc.ruby
317
+ sources/ruby/wordfreq.ruby
318
+ sources/ruby/wordfreq.ruby2.ruby
319
+ test/fixtures/sources/gcc/ackermann.gcc-2.gcc
320
+ test/fixtures/sources/python/ackermann.python
321
+ test/fixtures/sources/ruby/ackermann.ruby
322
+ test/test_source_classifier.rb
323
+ test/test_trainer.rb
324
+ trainer.bin
325
+ Manifest
data/README.textile ADDED
@@ -0,0 +1,51 @@
1
+ h1. SourceClassifier
2
+
3
+ Source classifier identifies programming language using a Bayesian classifier trained on a corpus generated from the "Computer Language Benchmarks Game":http://shootout.alioth.debian.org/ . It is written in Ruby and availabe as a gem. To train the classifier to identify new languages download the sources from github.
4
+
5
+ h2. Usage
6
+
7
+ First install the gem using github as a source
8
+
9
+ $ gem sources -a http://gems.github.com
10
+ $ sudo gem install chrislo-sourceclassifier
11
+
12
+ Then, to use
13
+
14
+ <pre>
15
+ require 'rubygems'
16
+ require 'sourceclassifier'
17
+
18
+ s = SourceClassifier.new
19
+
20
+ ruby_text = <<EOT
21
+ def my_sorting_function(a)
22
+ a.sort
23
+ end
24
+ EOT
25
+
26
+ c_text = <<EOT
27
+ #include <unistd.h>
28
+
29
+ int main() {
30
+ write(1, "hello world\n", 12);
31
+ return(0);
32
+ }
33
+ EOT
34
+
35
+ s.identify(ruby_text) #=> Ruby
36
+ s.identify(c_text) #=> Gcc
37
+ </pre>
38
+
39
+ h2. Training
40
+
41
+ Download the sources from github and in the directory run the training rake test
42
+
43
+ $ rake train
44
+
45
+ In the ./sources directory are subdirectories for each language you wish to be able to identify. Each subdirectory contains examples of programs written in that language. The name of the directory is significant - it is the value returned by the SourceClassifier.identify() method.
46
+
47
+ The rake task populate can be used to build these subdirectories from a checkout of the "computer language shootout sources":http://alioth.debian.org/scm/?group_id=30402 but you are free to train the classifier using any available examples.
48
+
49
+ h2. Acknowledgments
50
+
51
+ This library depends heavily on the great "Classifier":http://classifier.rubyforge.org/ gem by Lucas Carlson and David Fayram II.
data/Rakefile ADDED
@@ -0,0 +1,64 @@
1
+ require 'rubygems'
2
+
3
+ require 'rake'
4
+ require 'rake/testtask'
5
+ require 'find'
6
+ require 'fileutils'
7
+ require 'echoe'
8
+
9
+ Echoe.new('sourceclassifier', '0.2.0') do |p|
10
+ p.description = "Determine the programming language used in a sample"
11
+ p.url = "http://github.com/chrislo/sourceclassifier/tree/master"
12
+ p.author = "Chris Lowis"
13
+ p.email = "chris.lowis@gmail.com"
14
+ p.ignore_pattern = ["tmp/*", "script/*", "sources/*"]
15
+ p.runtime_dependencies = ["classifier"]
16
+ p.development_dependencies = []
17
+ end
18
+
19
+ Dir["#{File.dirname(__FILE__)}/tasks/*.rake"].sort.each { |ext| load ext }
20
+
21
+ module Find
22
+ def match(*paths)
23
+ matched = []
24
+ find(*paths) { |path| matched << path if yield path }
25
+ return matched
26
+ end
27
+ module_function :match
28
+ end
29
+
30
+ SHOOTOUT_CVS_ROOT = '/Users/chris/tmp/shootout-scm-2008-08-24/'
31
+
32
+ task :default => [:test_units]
33
+
34
+ desc "Run basic tests"
35
+ Rake::TestTask.new("test_units") { |t|
36
+ t.pattern = 'test/test_*.rb'
37
+ t.verbose = false
38
+ t.warning = false
39
+ }
40
+
41
+ desc "Populate training data directories"
42
+ task :populate do
43
+ languages = %w[ruby python gcc perl java javascript]
44
+
45
+ languages.each do |language|
46
+ # create directories
47
+ target_dir = "./sources/#{language}"
48
+ FileUtils.mkdir_p target_dir
49
+
50
+ # copy source files to appropriate directories
51
+ Find.match(SHOOTOUT_CVS_ROOT) do |file|
52
+ this_ext = File.extname(file).downcase
53
+ if this_ext == ".#{language}"
54
+ FileUtils.cp file, "#{target_dir}/#{File.basename(file)}"
55
+ end
56
+ end
57
+ end
58
+ end
59
+
60
+ desc "Train using training data directory"
61
+ task :train do
62
+ require './lib/trainer.rb'
63
+ t = Trainer.new('./sources','./')
64
+ end
@@ -0,0 +1,22 @@
1
+ require 'rubygems'
2
+ require 'sourceclassifier'
3
+
4
+ s = SourceClassifier.new
5
+
6
+ ruby_text = <<EOT
7
+ def my_sorting_function(a)
8
+ a.sort
9
+ end
10
+ EOT
11
+
12
+ c_text = <<EOT
13
+ #include <unistd.h>
14
+
15
+ int main() {
16
+ write(1, "hello world\n", 12);
17
+ return(0);
18
+ }
19
+ EOT
20
+
21
+ s.identify(ruby_text) #=> Ruby
22
+ s.identify(c_text) #=> Gcc