despamilator 0.8 → 1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (646) hide show
  1. data/.rspec +3 -0
  2. data/.rvmrc +1 -0
  3. data/Gemfile +12 -0
  4. data/Gemfile.lock +47 -0
  5. data/History.txt +14 -0
  6. data/Manifest.txt +9 -605
  7. data/README.rdoc +37 -37
  8. data/Rakefile +10 -3
  9. data/despamilator.gemspec +8 -11
  10. data/lib/despamilator.rb +26 -1
  11. data/lib/despamilator/filter.rb +15 -26
  12. data/lib/despamilator/filter/funky_consonant.rb +25 -15
  13. data/lib/despamilator/filter/html_tags.rb +122 -111
  14. data/lib/despamilator/filter/ip_address_url.rb +18 -8
  15. data/lib/despamilator/filter/long_words.rb +20 -10
  16. data/lib/despamilator/filter/naughty_q.rb +24 -14
  17. data/lib/despamilator/filter/naughty_words.rb +25 -16
  18. data/lib/despamilator/filter/numbers_and_words.rb +39 -29
  19. data/lib/despamilator/filter/script_tag.rb +18 -10
  20. data/lib/despamilator/filter/shouting.rb +25 -15
  21. data/lib/despamilator/filter/square_brackets.rb +19 -9
  22. data/lib/despamilator/filter/urls.rb +24 -8
  23. data/lib/despamilator/filter_base.rb +54 -9
  24. data/spec/despamilator_spec.rb +0 -2
  25. data/spec/filter_base_spec.rb +30 -0
  26. data/spec/filters/funky_consonant_spec.rb +6 -36
  27. data/spec/filters/html_tags_spec.rb +120 -138
  28. data/spec/filters/ip_address_url_spec.rb +6 -24
  29. data/spec/filters/long_words_spec.rb +6 -29
  30. data/spec/filters/naughty_q_spec.rb +6 -34
  31. data/spec/filters/naughty_words_spec.rb +6 -34
  32. data/spec/filters/numbers_and_words_spec.rb +21 -46
  33. data/spec/filters/script_tag_spec.rb +10 -20
  34. data/spec/filters/shouting_spec.rb +28 -33
  35. data/spec/filters/square_brackets_spec.rb +6 -30
  36. data/spec/filters/urls_spec.rb +6 -34
  37. data/spec/helpers/corpus_helper.rb +5 -0
  38. data/spec/helpers/filter_helper.rb +59 -0
  39. data/spec/helpers/spec_helper.rb +6 -0
  40. data/tasks/test.rake +6 -0
  41. metadata +19 -611
  42. data/lib/despamilator/validation.rb +0 -12
  43. data/spec/clean_corpus/101.txt.gz +0 -0
  44. data/spec/clean_corpus/103.txt.gz +0 -0
  45. data/spec/clean_corpus/105.txt.gz +0 -0
  46. data/spec/clean_corpus/107.txt.gz +0 -0
  47. data/spec/clean_corpus/109.txt.gz +0 -0
  48. data/spec/clean_corpus/111.txt.gz +0 -0
  49. data/spec/clean_corpus/113.txt.gz +0 -0
  50. data/spec/clean_corpus/115.txt.gz +0 -0
  51. data/spec/clean_corpus/117.txt.gz +0 -0
  52. data/spec/clean_corpus/119.txt.gz +0 -0
  53. data/spec/clean_corpus/121.txt.gz +0 -0
  54. data/spec/clean_corpus/123.txt.gz +0 -0
  55. data/spec/clean_corpus/125.txt.gz +0 -0
  56. data/spec/clean_corpus/127.txt.gz +0 -0
  57. data/spec/clean_corpus/129.txt.gz +0 -0
  58. data/spec/clean_corpus/131.txt.gz +0 -0
  59. data/spec/clean_corpus/133.txt.gz +0 -0
  60. data/spec/clean_corpus/135.txt.gz +0 -0
  61. data/spec/clean_corpus/137.txt.gz +0 -0
  62. data/spec/clean_corpus/139.txt.gz +0 -0
  63. data/spec/clean_corpus/141.txt.gz +0 -0
  64. data/spec/clean_corpus/143.txt.gz +0 -0
  65. data/spec/clean_corpus/145.txt.gz +0 -0
  66. data/spec/clean_corpus/147.txt.gz +0 -0
  67. data/spec/clean_corpus/149.txt.gz +0 -0
  68. data/spec/clean_corpus/151.txt.gz +0 -0
  69. data/spec/clean_corpus/153.txt.gz +0 -0
  70. data/spec/clean_corpus/155.txt.gz +0 -0
  71. data/spec/clean_corpus/157.txt.gz +0 -0
  72. data/spec/clean_corpus/159.txt.gz +0 -0
  73. data/spec/clean_corpus/161.txt.gz +0 -0
  74. data/spec/clean_corpus/163.txt.gz +0 -0
  75. data/spec/clean_corpus/165.txt.gz +0 -0
  76. data/spec/clean_corpus/167.txt.gz +0 -0
  77. data/spec/clean_corpus/169.txt.gz +0 -0
  78. data/spec/clean_corpus/171.txt.gz +0 -0
  79. data/spec/clean_corpus/173.txt.gz +0 -0
  80. data/spec/clean_corpus/175.txt.gz +0 -0
  81. data/spec/clean_corpus/177.txt.gz +0 -0
  82. data/spec/clean_corpus/179.txt.gz +0 -0
  83. data/spec/clean_corpus/18.txt.gz +0 -0
  84. data/spec/clean_corpus/181.txt.gz +0 -0
  85. data/spec/clean_corpus/183.txt.gz +0 -0
  86. data/spec/clean_corpus/185.txt.gz +0 -0
  87. data/spec/clean_corpus/187.txt.gz +0 -0
  88. data/spec/clean_corpus/189.txt.gz +0 -0
  89. data/spec/clean_corpus/191.txt.gz +0 -0
  90. data/spec/clean_corpus/193.txt.gz +0 -0
  91. data/spec/clean_corpus/195.txt.gz +0 -0
  92. data/spec/clean_corpus/197.txt.gz +0 -0
  93. data/spec/clean_corpus/199.txt.gz +0 -0
  94. data/spec/clean_corpus/20.txt.gz +0 -0
  95. data/spec/clean_corpus/201.txt.gz +0 -0
  96. data/spec/clean_corpus/203.txt.gz +0 -0
  97. data/spec/clean_corpus/205.txt.gz +0 -0
  98. data/spec/clean_corpus/207.txt.gz +0 -0
  99. data/spec/clean_corpus/209.txt.gz +0 -0
  100. data/spec/clean_corpus/211.txt.gz +0 -0
  101. data/spec/clean_corpus/213.txt.gz +0 -0
  102. data/spec/clean_corpus/215.txt.gz +0 -0
  103. data/spec/clean_corpus/217.txt.gz +0 -0
  104. data/spec/clean_corpus/219.txt.gz +0 -0
  105. data/spec/clean_corpus/22.txt.gz +0 -0
  106. data/spec/clean_corpus/221.txt.gz +0 -0
  107. data/spec/clean_corpus/223.txt.gz +0 -0
  108. data/spec/clean_corpus/225.txt.gz +0 -0
  109. data/spec/clean_corpus/24.txt.gz +0 -0
  110. data/spec/clean_corpus/26.txt.gz +0 -0
  111. data/spec/clean_corpus/27.txt.gz +0 -0
  112. data/spec/clean_corpus/29.txt.gz +0 -0
  113. data/spec/clean_corpus/31.txt.gz +0 -0
  114. data/spec/clean_corpus/33.txt.gz +0 -0
  115. data/spec/clean_corpus/35.txt.gz +0 -0
  116. data/spec/clean_corpus/37.txt.gz +0 -0
  117. data/spec/clean_corpus/39.txt.gz +0 -0
  118. data/spec/clean_corpus/41.txt.gz +0 -0
  119. data/spec/clean_corpus/43.txt.gz +0 -0
  120. data/spec/clean_corpus/45.txt.gz +0 -0
  121. data/spec/clean_corpus/47.txt.gz +0 -0
  122. data/spec/clean_corpus/49.txt.gz +0 -0
  123. data/spec/clean_corpus/51.txt.gz +0 -0
  124. data/spec/clean_corpus/53.txt.gz +0 -0
  125. data/spec/clean_corpus/55.txt.gz +0 -0
  126. data/spec/clean_corpus/57.txt.gz +0 -0
  127. data/spec/clean_corpus/59.txt.gz +0 -0
  128. data/spec/clean_corpus/61.txt.gz +0 -0
  129. data/spec/clean_corpus/63.txt.gz +0 -0
  130. data/spec/clean_corpus/65.txt.gz +0 -0
  131. data/spec/clean_corpus/67.txt.gz +0 -0
  132. data/spec/clean_corpus/69.txt.gz +0 -0
  133. data/spec/clean_corpus/71.txt.gz +0 -0
  134. data/spec/clean_corpus/73.txt.gz +0 -0
  135. data/spec/clean_corpus/75.txt.gz +0 -0
  136. data/spec/clean_corpus/77.txt.gz +0 -0
  137. data/spec/clean_corpus/79.txt.gz +0 -0
  138. data/spec/clean_corpus/81.txt.gz +0 -0
  139. data/spec/clean_corpus/83.txt.gz +0 -0
  140. data/spec/clean_corpus/85.txt.gz +0 -0
  141. data/spec/clean_corpus/87.txt.gz +0 -0
  142. data/spec/clean_corpus/89.txt.gz +0 -0
  143. data/spec/clean_corpus/91.txt.gz +0 -0
  144. data/spec/clean_corpus/93.txt.gz +0 -0
  145. data/spec/clean_corpus/95.txt.gz +0 -0
  146. data/spec/clean_corpus/97.txt.gz +0 -0
  147. data/spec/clean_corpus/99.txt.gz +0 -0
  148. data/spec/clean_corpus_spec.rb +0 -11
  149. data/spec/despamilator_validation_spec.rb +0 -27
  150. data/spec/spam_corpus/0.txt.gz +0 -0
  151. data/spec/spam_corpus/1.txt.gz +0 -0
  152. data/spec/spam_corpus/10.txt.gz +0 -0
  153. data/spec/spam_corpus/100.txt.gz +0 -0
  154. data/spec/spam_corpus/102.txt.gz +0 -0
  155. data/spec/spam_corpus/104.txt.gz +0 -0
  156. data/spec/spam_corpus/106.txt.gz +0 -0
  157. data/spec/spam_corpus/108.txt.gz +0 -0
  158. data/spec/spam_corpus/11.txt.gz +0 -0
  159. data/spec/spam_corpus/110.txt.gz +0 -0
  160. data/spec/spam_corpus/112.txt.gz +0 -0
  161. data/spec/spam_corpus/114.txt.gz +0 -0
  162. data/spec/spam_corpus/116.txt.gz +0 -0
  163. data/spec/spam_corpus/118.txt.gz +0 -0
  164. data/spec/spam_corpus/12.txt.gz +0 -0
  165. data/spec/spam_corpus/120.txt.gz +0 -0
  166. data/spec/spam_corpus/122.txt.gz +0 -0
  167. data/spec/spam_corpus/124.txt.gz +0 -0
  168. data/spec/spam_corpus/126.txt.gz +0 -0
  169. data/spec/spam_corpus/128.txt.gz +0 -0
  170. data/spec/spam_corpus/13.txt.gz +0 -0
  171. data/spec/spam_corpus/130.txt.gz +0 -0
  172. data/spec/spam_corpus/132.txt.gz +0 -0
  173. data/spec/spam_corpus/134.txt.gz +0 -0
  174. data/spec/spam_corpus/136.txt.gz +0 -0
  175. data/spec/spam_corpus/138.txt.gz +0 -0
  176. data/spec/spam_corpus/14.txt.gz +0 -0
  177. data/spec/spam_corpus/140.txt.gz +0 -0
  178. data/spec/spam_corpus/142.txt.gz +0 -0
  179. data/spec/spam_corpus/144.txt.gz +0 -0
  180. data/spec/spam_corpus/146.txt.gz +0 -0
  181. data/spec/spam_corpus/148.txt.gz +0 -0
  182. data/spec/spam_corpus/15.txt.gz +0 -0
  183. data/spec/spam_corpus/150.txt.gz +0 -0
  184. data/spec/spam_corpus/152.txt.gz +0 -0
  185. data/spec/spam_corpus/154.txt.gz +0 -0
  186. data/spec/spam_corpus/156.txt.gz +0 -0
  187. data/spec/spam_corpus/158.txt.gz +0 -0
  188. data/spec/spam_corpus/16.txt.gz +0 -0
  189. data/spec/spam_corpus/160.txt.gz +0 -0
  190. data/spec/spam_corpus/162.txt.gz +0 -0
  191. data/spec/spam_corpus/164.txt.gz +0 -0
  192. data/spec/spam_corpus/166.txt.gz +0 -0
  193. data/spec/spam_corpus/168.txt.gz +0 -0
  194. data/spec/spam_corpus/170.txt.gz +0 -0
  195. data/spec/spam_corpus/172.txt.gz +0 -0
  196. data/spec/spam_corpus/174.txt.gz +0 -0
  197. data/spec/spam_corpus/176.txt.gz +0 -0
  198. data/spec/spam_corpus/178.txt.gz +0 -0
  199. data/spec/spam_corpus/180.txt.gz +0 -0
  200. data/spec/spam_corpus/182.txt.gz +0 -0
  201. data/spec/spam_corpus/184.txt.gz +0 -0
  202. data/spec/spam_corpus/186.txt.gz +0 -0
  203. data/spec/spam_corpus/188.txt.gz +0 -0
  204. data/spec/spam_corpus/190.txt.gz +0 -0
  205. data/spec/spam_corpus/192.txt.gz +0 -0
  206. data/spec/spam_corpus/194.txt.gz +0 -0
  207. data/spec/spam_corpus/196.txt.gz +0 -0
  208. data/spec/spam_corpus/198.txt.gz +0 -0
  209. data/spec/spam_corpus/2.txt.gz +0 -0
  210. data/spec/spam_corpus/200.txt.gz +0 -0
  211. data/spec/spam_corpus/202.txt.gz +0 -0
  212. data/spec/spam_corpus/204.txt.gz +0 -0
  213. data/spec/spam_corpus/206.txt.gz +0 -0
  214. data/spec/spam_corpus/208.txt.gz +0 -0
  215. data/spec/spam_corpus/210.txt.gz +0 -0
  216. data/spec/spam_corpus/212.txt.gz +0 -0
  217. data/spec/spam_corpus/214.txt.gz +0 -0
  218. data/spec/spam_corpus/216.txt.gz +0 -0
  219. data/spec/spam_corpus/218.txt.gz +0 -0
  220. data/spec/spam_corpus/220.txt.gz +0 -0
  221. data/spec/spam_corpus/222.txt.gz +0 -0
  222. data/spec/spam_corpus/224.txt.gz +0 -0
  223. data/spec/spam_corpus/226.txt.gz +0 -0
  224. data/spec/spam_corpus/228.txt.gz +0 -0
  225. data/spec/spam_corpus/230.txt.gz +0 -0
  226. data/spec/spam_corpus/232.txt.gz +0 -0
  227. data/spec/spam_corpus/234.txt.gz +0 -0
  228. data/spec/spam_corpus/236.txt.gz +0 -0
  229. data/spec/spam_corpus/238.txt.gz +0 -0
  230. data/spec/spam_corpus/240.txt.gz +0 -0
  231. data/spec/spam_corpus/242.txt.gz +0 -0
  232. data/spec/spam_corpus/244.txt.gz +0 -0
  233. data/spec/spam_corpus/246.txt.gz +0 -0
  234. data/spec/spam_corpus/248.txt.gz +0 -0
  235. data/spec/spam_corpus/250.txt.gz +0 -0
  236. data/spec/spam_corpus/252.txt.gz +0 -0
  237. data/spec/spam_corpus/254.txt.gz +0 -0
  238. data/spec/spam_corpus/256.txt.gz +0 -0
  239. data/spec/spam_corpus/258.txt.gz +0 -0
  240. data/spec/spam_corpus/260.txt.gz +0 -0
  241. data/spec/spam_corpus/262.txt.gz +0 -0
  242. data/spec/spam_corpus/264.txt.gz +0 -0
  243. data/spec/spam_corpus/266.txt.gz +0 -0
  244. data/spec/spam_corpus/268.txt.gz +0 -0
  245. data/spec/spam_corpus/270.txt.gz +0 -0
  246. data/spec/spam_corpus/272.txt.gz +0 -0
  247. data/spec/spam_corpus/274.txt.gz +0 -0
  248. data/spec/spam_corpus/276.txt.gz +0 -0
  249. data/spec/spam_corpus/278.txt.gz +0 -0
  250. data/spec/spam_corpus/28.txt.gz +0 -0
  251. data/spec/spam_corpus/280.txt.gz +0 -0
  252. data/spec/spam_corpus/282.txt.gz +0 -0
  253. data/spec/spam_corpus/284.txt.gz +0 -0
  254. data/spec/spam_corpus/286.txt.gz +0 -0
  255. data/spec/spam_corpus/288.txt.gz +0 -0
  256. data/spec/spam_corpus/290.txt.gz +0 -0
  257. data/spec/spam_corpus/292.txt.gz +0 -0
  258. data/spec/spam_corpus/294.txt.gz +0 -0
  259. data/spec/spam_corpus/296.txt.gz +0 -0
  260. data/spec/spam_corpus/298.txt.gz +0 -0
  261. data/spec/spam_corpus/3.txt.gz +0 -0
  262. data/spec/spam_corpus/30.txt.gz +0 -0
  263. data/spec/spam_corpus/300.txt.gz +0 -0
  264. data/spec/spam_corpus/302.txt.gz +0 -0
  265. data/spec/spam_corpus/304.txt.gz +0 -0
  266. data/spec/spam_corpus/306.txt.gz +0 -0
  267. data/spec/spam_corpus/308.txt.gz +0 -0
  268. data/spec/spam_corpus/310.txt.gz +0 -0
  269. data/spec/spam_corpus/312.txt.gz +0 -0
  270. data/spec/spam_corpus/314.txt.gz +0 -0
  271. data/spec/spam_corpus/316.txt.gz +0 -0
  272. data/spec/spam_corpus/318.txt.gz +0 -0
  273. data/spec/spam_corpus/32.txt.gz +0 -0
  274. data/spec/spam_corpus/320.txt.gz +0 -0
  275. data/spec/spam_corpus/322.txt.gz +0 -0
  276. data/spec/spam_corpus/324.txt.gz +0 -0
  277. data/spec/spam_corpus/326.txt.gz +0 -0
  278. data/spec/spam_corpus/328.txt.gz +0 -0
  279. data/spec/spam_corpus/330.txt.gz +0 -0
  280. data/spec/spam_corpus/332.txt.gz +0 -0
  281. data/spec/spam_corpus/334.txt.gz +0 -0
  282. data/spec/spam_corpus/336.txt.gz +0 -0
  283. data/spec/spam_corpus/338.txt.gz +0 -0
  284. data/spec/spam_corpus/34.txt.gz +0 -0
  285. data/spec/spam_corpus/340.txt.gz +0 -0
  286. data/spec/spam_corpus/342.txt.gz +0 -0
  287. data/spec/spam_corpus/344.txt.gz +0 -0
  288. data/spec/spam_corpus/346.txt.gz +0 -0
  289. data/spec/spam_corpus/348.txt.gz +0 -0
  290. data/spec/spam_corpus/350.txt.gz +0 -0
  291. data/spec/spam_corpus/352.txt.gz +0 -0
  292. data/spec/spam_corpus/354.txt.gz +0 -0
  293. data/spec/spam_corpus/356.txt.gz +0 -0
  294. data/spec/spam_corpus/358.txt.gz +0 -0
  295. data/spec/spam_corpus/36.txt.gz +0 -0
  296. data/spec/spam_corpus/360.txt.gz +0 -0
  297. data/spec/spam_corpus/362.txt.gz +0 -0
  298. data/spec/spam_corpus/364.txt.gz +0 -0
  299. data/spec/spam_corpus/366.txt.gz +0 -0
  300. data/spec/spam_corpus/368.txt.gz +0 -0
  301. data/spec/spam_corpus/370.txt.gz +0 -0
  302. data/spec/spam_corpus/372.txt.gz +0 -0
  303. data/spec/spam_corpus/374.txt.gz +0 -0
  304. data/spec/spam_corpus/376.txt.gz +0 -0
  305. data/spec/spam_corpus/378.txt.gz +0 -0
  306. data/spec/spam_corpus/38.txt.gz +0 -0
  307. data/spec/spam_corpus/380.txt.gz +0 -0
  308. data/spec/spam_corpus/382.txt.gz +0 -0
  309. data/spec/spam_corpus/384.txt.gz +0 -0
  310. data/spec/spam_corpus/386.txt.gz +0 -0
  311. data/spec/spam_corpus/388.txt.gz +0 -0
  312. data/spec/spam_corpus/390.txt.gz +0 -0
  313. data/spec/spam_corpus/392.txt.gz +0 -0
  314. data/spec/spam_corpus/394.txt.gz +0 -0
  315. data/spec/spam_corpus/396.txt.gz +0 -0
  316. data/spec/spam_corpus/398.txt.gz +0 -0
  317. data/spec/spam_corpus/4.txt.gz +0 -0
  318. data/spec/spam_corpus/40.txt.gz +0 -0
  319. data/spec/spam_corpus/400.txt.gz +0 -0
  320. data/spec/spam_corpus/402.txt.gz +0 -0
  321. data/spec/spam_corpus/404.txt.gz +0 -0
  322. data/spec/spam_corpus/406.txt.gz +0 -0
  323. data/spec/spam_corpus/408.txt.gz +0 -0
  324. data/spec/spam_corpus/410.txt.gz +0 -0
  325. data/spec/spam_corpus/412.txt.gz +0 -0
  326. data/spec/spam_corpus/414.txt.gz +0 -0
  327. data/spec/spam_corpus/416.txt.gz +0 -0
  328. data/spec/spam_corpus/418.txt.gz +0 -0
  329. data/spec/spam_corpus/42.txt.gz +0 -0
  330. data/spec/spam_corpus/420.txt.gz +0 -0
  331. data/spec/spam_corpus/422.txt.gz +0 -0
  332. data/spec/spam_corpus/424.txt.gz +0 -0
  333. data/spec/spam_corpus/426.txt.gz +0 -0
  334. data/spec/spam_corpus/428.txt.gz +0 -0
  335. data/spec/spam_corpus/430.txt.gz +0 -0
  336. data/spec/spam_corpus/432.txt.gz +0 -0
  337. data/spec/spam_corpus/434.txt.gz +0 -0
  338. data/spec/spam_corpus/436.txt.gz +0 -0
  339. data/spec/spam_corpus/438.txt.gz +0 -0
  340. data/spec/spam_corpus/44.txt.gz +0 -0
  341. data/spec/spam_corpus/440.txt.gz +0 -0
  342. data/spec/spam_corpus/442.txt.gz +0 -0
  343. data/spec/spam_corpus/444.txt.gz +0 -0
  344. data/spec/spam_corpus/446.txt.gz +0 -0
  345. data/spec/spam_corpus/448.txt.gz +0 -0
  346. data/spec/spam_corpus/450.txt.gz +0 -0
  347. data/spec/spam_corpus/452.txt.gz +0 -0
  348. data/spec/spam_corpus/454.txt.gz +0 -0
  349. data/spec/spam_corpus/456.txt.gz +0 -0
  350. data/spec/spam_corpus/458.txt.gz +0 -0
  351. data/spec/spam_corpus/46.txt.gz +0 -0
  352. data/spec/spam_corpus/460.txt.gz +0 -0
  353. data/spec/spam_corpus/462.txt.gz +0 -0
  354. data/spec/spam_corpus/464.txt.gz +0 -0
  355. data/spec/spam_corpus/466.txt.gz +0 -0
  356. data/spec/spam_corpus/468.txt.gz +0 -0
  357. data/spec/spam_corpus/470.txt.gz +0 -0
  358. data/spec/spam_corpus/472.txt.gz +0 -0
  359. data/spec/spam_corpus/474.txt.gz +0 -0
  360. data/spec/spam_corpus/476.txt.gz +0 -0
  361. data/spec/spam_corpus/478.txt.gz +0 -0
  362. data/spec/spam_corpus/48.txt.gz +0 -0
  363. data/spec/spam_corpus/480.txt.gz +0 -0
  364. data/spec/spam_corpus/482.txt.gz +0 -0
  365. data/spec/spam_corpus/484.txt.gz +0 -0
  366. data/spec/spam_corpus/486.txt.gz +0 -0
  367. data/spec/spam_corpus/488.txt.gz +0 -0
  368. data/spec/spam_corpus/490.txt.gz +0 -0
  369. data/spec/spam_corpus/492.txt.gz +0 -0
  370. data/spec/spam_corpus/494.txt.gz +0 -0
  371. data/spec/spam_corpus/496.txt.gz +0 -0
  372. data/spec/spam_corpus/498.txt.gz +0 -0
  373. data/spec/spam_corpus/5.txt.gz +0 -0
  374. data/spec/spam_corpus/50.txt.gz +0 -0
  375. data/spec/spam_corpus/500.txt.gz +0 -0
  376. data/spec/spam_corpus/502.txt.gz +0 -0
  377. data/spec/spam_corpus/504.txt.gz +0 -0
  378. data/spec/spam_corpus/506.txt.gz +0 -0
  379. data/spec/spam_corpus/508.txt.gz +0 -0
  380. data/spec/spam_corpus/510.txt.gz +0 -0
  381. data/spec/spam_corpus/512.txt.gz +0 -0
  382. data/spec/spam_corpus/514.txt.gz +0 -0
  383. data/spec/spam_corpus/516.txt.gz +0 -0
  384. data/spec/spam_corpus/518.txt.gz +0 -0
  385. data/spec/spam_corpus/52.txt.gz +0 -0
  386. data/spec/spam_corpus/520.txt.gz +0 -0
  387. data/spec/spam_corpus/522.txt.gz +0 -0
  388. data/spec/spam_corpus/524.txt.gz +0 -0
  389. data/spec/spam_corpus/526.txt.gz +0 -0
  390. data/spec/spam_corpus/528.txt.gz +0 -0
  391. data/spec/spam_corpus/530.txt.gz +0 -0
  392. data/spec/spam_corpus/532.txt.gz +0 -0
  393. data/spec/spam_corpus/534.txt.gz +0 -0
  394. data/spec/spam_corpus/536.txt.gz +0 -0
  395. data/spec/spam_corpus/538.txt.gz +0 -0
  396. data/spec/spam_corpus/54.txt.gz +0 -0
  397. data/spec/spam_corpus/540.txt.gz +0 -0
  398. data/spec/spam_corpus/542.txt.gz +0 -0
  399. data/spec/spam_corpus/544.txt.gz +0 -0
  400. data/spec/spam_corpus/546.txt.gz +0 -0
  401. data/spec/spam_corpus/548.txt.gz +0 -0
  402. data/spec/spam_corpus/550.txt.gz +0 -0
  403. data/spec/spam_corpus/552.txt.gz +0 -0
  404. data/spec/spam_corpus/554.txt.gz +0 -0
  405. data/spec/spam_corpus/556.txt.gz +0 -0
  406. data/spec/spam_corpus/558.txt.gz +0 -0
  407. data/spec/spam_corpus/56.txt.gz +0 -0
  408. data/spec/spam_corpus/560.txt.gz +0 -0
  409. data/spec/spam_corpus/562.txt.gz +0 -0
  410. data/spec/spam_corpus/564.txt.gz +0 -0
  411. data/spec/spam_corpus/566.txt.gz +0 -0
  412. data/spec/spam_corpus/568.txt.gz +0 -0
  413. data/spec/spam_corpus/570.txt.gz +0 -0
  414. data/spec/spam_corpus/572.txt.gz +0 -0
  415. data/spec/spam_corpus/574.txt.gz +0 -0
  416. data/spec/spam_corpus/576.txt.gz +0 -0
  417. data/spec/spam_corpus/578.txt.gz +0 -0
  418. data/spec/spam_corpus/58.txt.gz +0 -0
  419. data/spec/spam_corpus/580.txt.gz +0 -0
  420. data/spec/spam_corpus/582.txt.gz +0 -0
  421. data/spec/spam_corpus/584.txt.gz +0 -0
  422. data/spec/spam_corpus/586.txt.gz +0 -0
  423. data/spec/spam_corpus/588.txt.gz +0 -0
  424. data/spec/spam_corpus/590.txt.gz +0 -0
  425. data/spec/spam_corpus/592.txt.gz +0 -0
  426. data/spec/spam_corpus/594.txt.gz +0 -0
  427. data/spec/spam_corpus/596.txt.gz +0 -0
  428. data/spec/spam_corpus/598.txt.gz +0 -0
  429. data/spec/spam_corpus/6.txt.gz +0 -0
  430. data/spec/spam_corpus/60.txt.gz +0 -0
  431. data/spec/spam_corpus/600.txt.gz +0 -0
  432. data/spec/spam_corpus/602.txt.gz +0 -0
  433. data/spec/spam_corpus/604.txt.gz +0 -0
  434. data/spec/spam_corpus/606.txt.gz +0 -0
  435. data/spec/spam_corpus/608.txt.gz +0 -0
  436. data/spec/spam_corpus/610.txt.gz +0 -0
  437. data/spec/spam_corpus/612.txt.gz +0 -0
  438. data/spec/spam_corpus/614.txt.gz +0 -0
  439. data/spec/spam_corpus/616.txt.gz +0 -0
  440. data/spec/spam_corpus/618.txt.gz +0 -0
  441. data/spec/spam_corpus/62.txt.gz +0 -0
  442. data/spec/spam_corpus/620.txt.gz +0 -0
  443. data/spec/spam_corpus/622.txt.gz +0 -0
  444. data/spec/spam_corpus/624.txt.gz +0 -0
  445. data/spec/spam_corpus/626.txt.gz +0 -0
  446. data/spec/spam_corpus/628.txt.gz +0 -0
  447. data/spec/spam_corpus/630.txt.gz +0 -0
  448. data/spec/spam_corpus/632.txt.gz +0 -0
  449. data/spec/spam_corpus/634.txt.gz +0 -0
  450. data/spec/spam_corpus/636.txt.gz +0 -0
  451. data/spec/spam_corpus/638.txt.gz +0 -0
  452. data/spec/spam_corpus/64.txt.gz +0 -0
  453. data/spec/spam_corpus/640.txt.gz +0 -0
  454. data/spec/spam_corpus/642.txt.gz +0 -0
  455. data/spec/spam_corpus/644.txt.gz +0 -0
  456. data/spec/spam_corpus/646.txt.gz +0 -0
  457. data/spec/spam_corpus/648.txt.gz +0 -0
  458. data/spec/spam_corpus/650.txt.gz +0 -0
  459. data/spec/spam_corpus/652.txt.gz +0 -0
  460. data/spec/spam_corpus/654.txt.gz +0 -0
  461. data/spec/spam_corpus/656.txt.gz +0 -0
  462. data/spec/spam_corpus/658.txt.gz +0 -0
  463. data/spec/spam_corpus/66.txt.gz +0 -0
  464. data/spec/spam_corpus/660.txt.gz +0 -0
  465. data/spec/spam_corpus/662.txt.gz +0 -0
  466. data/spec/spam_corpus/664.txt.gz +0 -0
  467. data/spec/spam_corpus/666.txt.gz +0 -0
  468. data/spec/spam_corpus/668.txt.gz +0 -0
  469. data/spec/spam_corpus/670.txt.gz +0 -0
  470. data/spec/spam_corpus/672.txt.gz +0 -0
  471. data/spec/spam_corpus/674.txt.gz +0 -0
  472. data/spec/spam_corpus/676.txt.gz +0 -0
  473. data/spec/spam_corpus/678.txt.gz +0 -0
  474. data/spec/spam_corpus/68.txt.gz +0 -0
  475. data/spec/spam_corpus/680.txt.gz +0 -0
  476. data/spec/spam_corpus/682.txt.gz +0 -0
  477. data/spec/spam_corpus/684.txt.gz +0 -0
  478. data/spec/spam_corpus/686.txt.gz +0 -0
  479. data/spec/spam_corpus/688.txt.gz +0 -0
  480. data/spec/spam_corpus/690.txt.gz +0 -0
  481. data/spec/spam_corpus/692.txt.gz +0 -0
  482. data/spec/spam_corpus/694.txt.gz +0 -0
  483. data/spec/spam_corpus/696.txt.gz +0 -0
  484. data/spec/spam_corpus/698.txt.gz +0 -0
  485. data/spec/spam_corpus/7.txt.gz +0 -0
  486. data/spec/spam_corpus/70.txt.gz +0 -0
  487. data/spec/spam_corpus/700.txt.gz +0 -0
  488. data/spec/spam_corpus/702.txt.gz +0 -0
  489. data/spec/spam_corpus/704.txt.gz +0 -0
  490. data/spec/spam_corpus/706.txt.gz +0 -0
  491. data/spec/spam_corpus/708.txt.gz +0 -0
  492. data/spec/spam_corpus/710.txt.gz +0 -0
  493. data/spec/spam_corpus/712.txt.gz +0 -0
  494. data/spec/spam_corpus/714.txt.gz +0 -0
  495. data/spec/spam_corpus/716.txt.gz +0 -0
  496. data/spec/spam_corpus/718.txt.gz +0 -0
  497. data/spec/spam_corpus/72.txt.gz +0 -0
  498. data/spec/spam_corpus/720.txt.gz +0 -0
  499. data/spec/spam_corpus/722.txt.gz +0 -0
  500. data/spec/spam_corpus/724.txt.gz +0 -0
  501. data/spec/spam_corpus/726.txt.gz +0 -0
  502. data/spec/spam_corpus/728.txt.gz +0 -0
  503. data/spec/spam_corpus/730.txt.gz +0 -0
  504. data/spec/spam_corpus/732.txt.gz +0 -0
  505. data/spec/spam_corpus/734.txt.gz +0 -0
  506. data/spec/spam_corpus/736.txt.gz +0 -0
  507. data/spec/spam_corpus/738.txt.gz +0 -0
  508. data/spec/spam_corpus/74.txt.gz +0 -0
  509. data/spec/spam_corpus/740.txt.gz +0 -0
  510. data/spec/spam_corpus/742.txt.gz +0 -0
  511. data/spec/spam_corpus/744.txt.gz +0 -0
  512. data/spec/spam_corpus/746.txt.gz +0 -0
  513. data/spec/spam_corpus/748.txt.gz +0 -0
  514. data/spec/spam_corpus/750.txt.gz +0 -0
  515. data/spec/spam_corpus/752.txt.gz +0 -0
  516. data/spec/spam_corpus/754.txt.gz +0 -0
  517. data/spec/spam_corpus/756.txt.gz +0 -0
  518. data/spec/spam_corpus/758.txt.gz +0 -0
  519. data/spec/spam_corpus/76.txt.gz +0 -0
  520. data/spec/spam_corpus/760.txt.gz +0 -0
  521. data/spec/spam_corpus/762.txt.gz +0 -0
  522. data/spec/spam_corpus/764.txt.gz +0 -0
  523. data/spec/spam_corpus/766.txt.gz +0 -0
  524. data/spec/spam_corpus/768.txt.gz +0 -0
  525. data/spec/spam_corpus/770.txt.gz +0 -0
  526. data/spec/spam_corpus/772.txt.gz +0 -0
  527. data/spec/spam_corpus/774.txt.gz +0 -0
  528. data/spec/spam_corpus/776.txt.gz +0 -0
  529. data/spec/spam_corpus/778.txt.gz +0 -0
  530. data/spec/spam_corpus/78.txt.gz +0 -0
  531. data/spec/spam_corpus/780.txt.gz +0 -0
  532. data/spec/spam_corpus/782.txt.gz +0 -0
  533. data/spec/spam_corpus/784.txt.gz +0 -0
  534. data/spec/spam_corpus/786.txt.gz +0 -0
  535. data/spec/spam_corpus/788.txt.gz +0 -0
  536. data/spec/spam_corpus/790.txt.gz +0 -0
  537. data/spec/spam_corpus/792.txt.gz +0 -0
  538. data/spec/spam_corpus/794.txt.gz +0 -0
  539. data/spec/spam_corpus/796.txt.gz +0 -0
  540. data/spec/spam_corpus/798.txt.gz +0 -0
  541. data/spec/spam_corpus/8.txt.gz +0 -0
  542. data/spec/spam_corpus/80.txt.gz +0 -0
  543. data/spec/spam_corpus/800.txt.gz +0 -0
  544. data/spec/spam_corpus/802.txt.gz +0 -0
  545. data/spec/spam_corpus/804.txt.gz +0 -0
  546. data/spec/spam_corpus/806.txt.gz +0 -0
  547. data/spec/spam_corpus/808.txt.gz +0 -0
  548. data/spec/spam_corpus/810.txt.gz +0 -0
  549. data/spec/spam_corpus/812.txt.gz +0 -0
  550. data/spec/spam_corpus/814.txt.gz +0 -0
  551. data/spec/spam_corpus/816.txt.gz +0 -0
  552. data/spec/spam_corpus/818.txt.gz +0 -0
  553. data/spec/spam_corpus/82.txt.gz +0 -0
  554. data/spec/spam_corpus/820.txt.gz +0 -0
  555. data/spec/spam_corpus/822.txt.gz +0 -0
  556. data/spec/spam_corpus/824.txt.gz +0 -0
  557. data/spec/spam_corpus/826.txt.gz +0 -0
  558. data/spec/spam_corpus/828.txt.gz +0 -0
  559. data/spec/spam_corpus/830.txt.gz +0 -0
  560. data/spec/spam_corpus/832.txt.gz +0 -0
  561. data/spec/spam_corpus/834.txt.gz +0 -0
  562. data/spec/spam_corpus/836.txt.gz +0 -0
  563. data/spec/spam_corpus/838.txt.gz +0 -0
  564. data/spec/spam_corpus/84.txt.gz +0 -0
  565. data/spec/spam_corpus/840.txt.gz +0 -0
  566. data/spec/spam_corpus/842.txt.gz +0 -0
  567. data/spec/spam_corpus/844.txt.gz +0 -0
  568. data/spec/spam_corpus/846.txt.gz +0 -0
  569. data/spec/spam_corpus/848.txt.gz +0 -0
  570. data/spec/spam_corpus/850.txt.gz +0 -0
  571. data/spec/spam_corpus/852.txt.gz +0 -0
  572. data/spec/spam_corpus/854.txt.gz +0 -0
  573. data/spec/spam_corpus/856.txt.gz +0 -0
  574. data/spec/spam_corpus/858.txt.gz +0 -0
  575. data/spec/spam_corpus/86.txt.gz +0 -0
  576. data/spec/spam_corpus/860.txt.gz +0 -0
  577. data/spec/spam_corpus/862.txt.gz +0 -0
  578. data/spec/spam_corpus/864.txt.gz +0 -0
  579. data/spec/spam_corpus/866.txt.gz +0 -0
  580. data/spec/spam_corpus/868.txt.gz +0 -0
  581. data/spec/spam_corpus/870.txt.gz +0 -0
  582. data/spec/spam_corpus/872.txt.gz +0 -0
  583. data/spec/spam_corpus/874.txt.gz +0 -0
  584. data/spec/spam_corpus/876.txt.gz +0 -0
  585. data/spec/spam_corpus/878.txt.gz +0 -0
  586. data/spec/spam_corpus/88.txt.gz +0 -0
  587. data/spec/spam_corpus/880.txt.gz +0 -0
  588. data/spec/spam_corpus/882.txt.gz +0 -0
  589. data/spec/spam_corpus/884.txt.gz +0 -0
  590. data/spec/spam_corpus/886.txt.gz +0 -0
  591. data/spec/spam_corpus/888.txt.gz +0 -0
  592. data/spec/spam_corpus/890.txt.gz +0 -0
  593. data/spec/spam_corpus/892.txt.gz +0 -0
  594. data/spec/spam_corpus/894.txt.gz +0 -0
  595. data/spec/spam_corpus/896.txt.gz +0 -0
  596. data/spec/spam_corpus/898.txt.gz +0 -0
  597. data/spec/spam_corpus/9.txt.gz +0 -0
  598. data/spec/spam_corpus/90.txt.gz +0 -0
  599. data/spec/spam_corpus/900.txt.gz +0 -0
  600. data/spec/spam_corpus/902.txt.gz +0 -0
  601. data/spec/spam_corpus/904.txt.gz +0 -0
  602. data/spec/spam_corpus/906.txt.gz +0 -0
  603. data/spec/spam_corpus/908.txt.gz +0 -0
  604. data/spec/spam_corpus/910.txt.gz +0 -0
  605. data/spec/spam_corpus/912.txt.gz +0 -0
  606. data/spec/spam_corpus/914.txt.gz +0 -0
  607. data/spec/spam_corpus/916.txt.gz +0 -0
  608. data/spec/spam_corpus/918.txt.gz +0 -0
  609. data/spec/spam_corpus/92.txt.gz +0 -0
  610. data/spec/spam_corpus/920.txt.gz +0 -0
  611. data/spec/spam_corpus/922.txt.gz +0 -0
  612. data/spec/spam_corpus/924.txt.gz +0 -0
  613. data/spec/spam_corpus/926.txt.gz +0 -0
  614. data/spec/spam_corpus/928.txt.gz +0 -0
  615. data/spec/spam_corpus/930.txt.gz +0 -0
  616. data/spec/spam_corpus/932.txt.gz +0 -0
  617. data/spec/spam_corpus/934.txt.gz +0 -0
  618. data/spec/spam_corpus/936.txt.gz +0 -0
  619. data/spec/spam_corpus/938.txt.gz +0 -0
  620. data/spec/spam_corpus/94.txt.gz +0 -0
  621. data/spec/spam_corpus/940.txt.gz +0 -0
  622. data/spec/spam_corpus/942.txt.gz +0 -0
  623. data/spec/spam_corpus/944.txt.gz +0 -0
  624. data/spec/spam_corpus/946.txt.gz +0 -0
  625. data/spec/spam_corpus/948.txt.gz +0 -0
  626. data/spec/spam_corpus/950.txt.gz +0 -0
  627. data/spec/spam_corpus/952.txt.gz +0 -0
  628. data/spec/spam_corpus/954.txt.gz +0 -0
  629. data/spec/spam_corpus/956.txt.gz +0 -0
  630. data/spec/spam_corpus/958.txt.gz +0 -0
  631. data/spec/spam_corpus/96.txt.gz +0 -0
  632. data/spec/spam_corpus/960.txt.gz +0 -0
  633. data/spec/spam_corpus/962.txt.gz +0 -0
  634. data/spec/spam_corpus/964.txt.gz +0 -0
  635. data/spec/spam_corpus/966.txt.gz +0 -0
  636. data/spec/spam_corpus/968.txt.gz +0 -0
  637. data/spec/spam_corpus/970.txt.gz +0 -0
  638. data/spec/spam_corpus/972.txt.gz +0 -0
  639. data/spec/spam_corpus/974.txt.gz +0 -0
  640. data/spec/spam_corpus/98.txt.gz +0 -0
  641. data/spec/spam_corpus/debugyouradd.com.txt.gz +0 -0
  642. data/spec/spam_corpus/humandesignconsulting.comm.txt.gz +0 -0
  643. data/spec/spam_corpus_spec.rb +0 -11
  644. data/spec/spec.opts +0 -1
  645. data/spec/spec_helper.rb +0 -16
  646. data/tasks/rspec.rake +0 -21
@@ -1,39 +1,11 @@
1
- require File.dirname(__FILE__) + '/../spec_helper.rb'
1
+ describe DespamilatorFilter::NaughtyWords do
2
2
 
3
- describe "NaughtyWords" do
4
- it "should return a score for 1 naughty word" do
5
- dspam = Despamilator.new('viagra')
6
- dspam.score.should == 0.1
7
- end
3
+ the_name_should_be 'Naughty Words'
4
+ the_description_should_be 'Detects cheeky words'
8
5
 
9
- describe 'attributes' do
10
- before :each do
11
- @filter = Despamilator.new('bondage').matched_by.first
12
- end
6
+ despamilator_should_apply_the_filter_for('bondage')
13
7
 
14
- it "should have a filename" do
15
- @filter.filename.should == 'naughty_words.rb'
16
- end
8
+ a_single_match_of('bondage', should_score: 0.1)
9
+ a_multiple_match_of('viagra penis', should_score: [0.2, 2.times])
17
10
 
18
- it "should have a name" do
19
- @filter.name.should == 'Naughty Words'
20
- end
21
-
22
- it "should have a description" do
23
- @filter.description.should == 'Detects cheeky words'
24
- end
25
-
26
- it "should have a number of matches" do
27
- @filter.matches.should == 1
28
- end
29
-
30
- it "should have a score" do
31
- @filter.score.should == 0.1
32
- end
33
- end
34
-
35
- it "should score more for 3 naughty words" do
36
- dspam = Despamilator.new('bondage viagra penis')
37
- dspam.score.to_s.should == 0.3.to_s
38
- end
39
11
  end
@@ -1,59 +1,34 @@
1
- require File.dirname(__FILE__) + '/../spec_helper.rb'
1
+ describe DespamilatorFilter::NumbersAndWords do
2
2
 
3
- describe "NumbersAndWords" do
4
- [1, 4, 10, 100000, '1,000,000', '1st', '2nd', '3rd', '4th', '5th', '6th', '10th', '122nd'].each do |number|
5
- it "should return a blank for a #{number}" do
6
- dspam = Despamilator.new(number)
7
- dspam.score.should == 0
8
- end
9
- end
10
-
11
- ['wanga x5 mool', '4ghk', 'XTHL9'].each do |string|
12
- it "should detect suspicious number word combos such as #{string}" do
13
- dspam = Despamilator.new(string)
14
- dspam.score.should == 0.1
15
- end
16
- end
3
+ the_name_should_be 'Numbers next to words'
4
+ the_description_should_be 'Detects unusual number/word combinations'
17
5
 
18
- ['4wanga x5 mool', '4g6hk', 'XT7HL9', '77th8nd'].each do |string|
19
- it "should detect multiple suspicious number word combos such as #{string}" do
20
- dspam = Despamilator.new(string)
21
- dspam.score.should == 0.2
22
- end
23
- end
6
+ despamilator_should_apply_the_filter_for('X5T')
24
7
 
25
- [1, 2, 3, 4, 5, 6].each do |tag_no|
26
- header_tag = "h#{tag_no}"
8
+ a_single_match_of('X5T', should_score: 0.1)
9
+ a_multiple_match_of('4g6hk', should_score: [0.2, 2.times])
27
10
 
28
- it "should ignore html header tag #{header_tag}" do
29
- dspam = Despamilator.new(header_tag)
30
- dspam.score.should == 0
11
+ describe 'exceptions' do
12
+ before :all do
13
+ @filter = DespamilatorFilter::NumbersAndWords.new
31
14
  end
32
- end
33
15
 
34
- describe 'attributes' do
35
- before :each do
36
- @filter = Despamilator.new('X5T').matched_by.first
16
+ [1, 4, 10, 100000, '1,000,000', '1st', '2nd', '3rd', '4th', '5th', '6th', '10th', '122nd'].each do |number|
17
+ it "should return a blank for a #{number}" do
18
+ @filter.parse(number.to_s)
19
+ @filter.score.should == 0
20
+ end
37
21
  end
38
22
 
39
- it "should have a filename" do
40
- @filter.filename.should == 'numbers_and_words.rb'
41
- end
42
-
43
- it "should have a name" do
44
- @filter.name.should == 'Numbers next to words'
45
- end
23
+ [1, 2, 3, 4, 5, 6].each do |tag_no|
24
+ header_tag = "h#{tag_no}"
46
25
 
47
- it "should have a description" do
48
- @filter.description.should == 'Detects unusual number/word combinations'
26
+ it "should ignore html header tag #{header_tag}" do
27
+ @filter.parse(header_tag)
28
+ @filter.score.should == 0
29
+ end
49
30
  end
50
31
 
51
- it "should have a number of matches" do
52
- @filter.matches.should == 1
53
- end
54
-
55
- it "should have a score" do
56
- @filter.score.should == 0.1
57
- end
58
32
  end
33
+
59
34
  end
@@ -1,6 +1,13 @@
1
- require File.dirname(__FILE__) + '/../spec_helper.rb'
1
+ describe DespamilatorFilter::ScriptTag do
2
+
3
+ the_name_should_be 'Script tag'
4
+ the_description_should_be 'Searches for variations for the HTML script tag'
5
+
6
+ despamilator_should_apply_the_filter_for('<script>')
7
+
8
+ a_single_match_of('<script>', should_score: 1)
9
+ a_multiple_match_of('<script></script> <script></script>', should_score: [1, 1.times])
2
10
 
3
- context "ScriptTag" do
4
11
  describe "detecting various script tags" do
5
12
  ['<script type="whatever">', '<script></script>', '</script>', '<script>', "<script\n>"].each do |script_tag|
6
13
  [script_tag.upcase, script_tag.downcase].each do |script_tag|
@@ -12,21 +19,4 @@ context "ScriptTag" do
12
19
  end
13
20
  end
14
21
 
15
- describe 'attributes' do
16
- before :all do
17
- @dspam = Despamilator.new('<script>').matched_by.first
18
- end
19
-
20
- it "should have a name" do
21
- @dspam.name.should == 'Detects script tags in text'
22
- end
23
-
24
- it "should have a description" do
25
- @dspam.description.should == 'Searches for variations for the HTML script tag'
26
- end
27
-
28
- it "should have a filename" do
29
- @dspam.filename.should == 'script_tag.rb'
30
- end
31
- end
32
- end
22
+ end
@@ -1,49 +1,44 @@
1
- require File.dirname(__FILE__) + '/../spec_helper.rb'
1
+ describe DespamilatorFilter::Shouting do
2
2
 
3
- describe "Shouting" do
4
- describe 'attributes' do
5
- before :each do
6
- @filter = Despamilator.new('HELLO THERE!! THIS IS SHOUTING!!').matched_by.first
7
- end
3
+ the_name_should_be 'Shouting'
4
+ the_description_should_be 'Detects and scores shouting (all caps)'
8
5
 
9
- it "should have a filename" do
10
- @filter.filename.should == 'shouting.rb'
11
- end
6
+ despamilator_should_apply_the_filter_for('this lil string is 50 PERCENT SHOUTING')
12
7
 
13
- it "should have a name" do
14
- @filter.name.should == 'Shouting'
15
- end
8
+ a_single_match_of('this lil string is 50 PERCENT SHOUTING', should_score: 0.25)
9
+ a_multiple_match_of('HELLO THERE!! THIS IS SHOUTING!!', should_score: [0.5, 1.times])
16
10
 
17
- it "should have a description" do
18
- @filter.description.should == 'Detects and scores shouting (all caps)'
19
- end
11
+ describe "exceptions" do
20
12
 
21
- it "should have a number of matches" do
22
- @filter.matches.should == 1
13
+ before :all do
14
+ @filter = DespamilatorFilter::Shouting.new
23
15
  end
24
16
 
25
- it "should have a score" do
26
- @filter.score.should == 0.5
27
- end
28
- end
29
-
30
- describe "filter" do
31
17
  it "should strip out HTML" do
32
- Despamilator.new('<H1>this is a flipping html tag whose contents is very long</h1>').matched_by.select {|a| a.name == 'Shouting'}.should be_empty
18
+ @filter.parse('<H1>this is a flipping html tag whose contents is very long</h1>')
19
+ @filter.score.should == 0
33
20
  end
34
21
 
35
22
  it "should ignore strings less than 20 characters long" do
36
- Despamilator.new('ABCD EFG HIJKLM NOP').matched_by.select {|a| a.name == 'Shouting'}.should be_empty
23
+ @filter.parse('ABCD EFG HIJKLM NOP')
24
+ @filter.score.should == 0
37
25
  end
38
26
 
39
- it "should score based on a percentage of uppercase words" do
40
- [
41
- ['this is a lowercased string', 0],
42
- ['this lil string is 50 PERCENT SHOUTING', 0.25],
43
- ['THIS LIL STRING IS 100 PERCENT SHOUTING', 0.5]
44
- ].each do |string, expected_score|
45
- Despamilator.new(string).score.should == expected_score
46
- end
27
+ end
28
+
29
+ [
30
+ ['this is a lowercased string', 0],
31
+ ['this lil string is 50 PERCENT SHOUTING', 0.25],
32
+ ['THIS LIL STRING IS 100 PERCENT SHOUTING', 0.5]
33
+ ].each do |string, expected_score|
34
+
35
+ it "should score the string '#{string}' based on a percentage of uppercase words" do
36
+ filter = DespamilatorFilter::Shouting.new
37
+
38
+ filter.parse(string)
39
+ filter.score.should == expected_score
47
40
  end
41
+
48
42
  end
43
+
49
44
  end
@@ -1,35 +1,11 @@
1
- require File.dirname(__FILE__) + '/../spec_helper.rb'
1
+ describe DespamilatorFilter::SquareBrackets do
2
2
 
3
- describe "SquareBrackets" do
4
- it "should return a score for 1 square bracket" do
5
- dspam = Despamilator.new('[')
6
- dspam.score.should == 0.05
7
- end
3
+ the_name_should_be 'Square Brackets'
4
+ the_description_should_be 'Detects each square bracket in a string'
8
5
 
9
- describe 'attributes' do
10
- before :each do
11
- @filter = Despamilator.new('[]').matched_by.first
12
- end
6
+ despamilator_should_apply_the_filter_for('[')
13
7
 
14
- it "should have a filename" do
15
- @filter.filename.should == 'square_brackets.rb'
16
- end
17
-
18
- it "should have a name" do
19
- @filter.name.should == 'Square Brackets'
20
- end
21
-
22
- it "should have a description" do
23
- @filter.description.should == 'Detects each square bracket in a string'
24
- end
25
-
26
- it "should have a number of matches" do
27
- @filter.matches.should == 2
28
- end
29
-
30
- it "should have a score" do
31
- @filter.score.should == 0.1
32
- end
33
- end
8
+ a_single_match_of('[', should_score: 0.05)
9
+ a_multiple_match_of('[]', should_score: [0.1, 2.times])
34
10
 
35
11
  end
@@ -1,39 +1,11 @@
1
- require File.dirname(__FILE__) + '/../spec_helper.rb'
1
+ describe DespamilatorFilter::URLs do
2
2
 
3
- describe "Url" do
4
- it "should return a score for 1 url" do
5
- dspam = Despamilator.new('http://www.blah.com')
6
- dspam.score.should == 0.2
7
- end
3
+ the_name_should_be 'URLs'
4
+ the_description_should_be 'Detects each url in a string'
8
5
 
9
- describe 'attributes' do
10
- before :each do
11
- @filter = Despamilator.new('http://www.blah.com').matched_by.first
12
- end
6
+ despamilator_should_apply_the_filter_for('zt')
13
7
 
14
- it "should have a filename" do
15
- @filter.filename.should == 'urls.rb'
16
- end
8
+ a_single_match_of('http://www.blah.com', should_score: 0.2)
9
+ a_multiple_match_of('http://www.blah.com http://www.poop.com', should_score: [0.4, 2.times])
17
10
 
18
- it "should have a name" do
19
- @filter.name.should == 'URLs'
20
- end
21
-
22
- it "should have a description" do
23
- @filter.description.should == 'Detects each url in a string'
24
- end
25
-
26
- it "should have a number of matches" do
27
- @filter.matches.should == 1
28
- end
29
-
30
- it "should have a score" do
31
- @filter.score.should == 0.2
32
- end
33
- end
34
-
35
- it "should score more for 2 misplaced urls" do
36
- dspam = Despamilator.new('http://www.blah.com http://www.poop.com')
37
- dspam.score.to_s.should == 0.4.to_s
38
- end
39
11
  end
@@ -0,0 +1,5 @@
1
+ require 'zlib'
2
+
3
+ def unzip_file filename
4
+ Zlib::GzipReader.open(filename).read
5
+ end
@@ -0,0 +1,59 @@
1
+ def the_name_should_be expected_name
2
+ it "should have a name" do
3
+ described_class.new.name.should == expected_name
4
+ end
5
+ end
6
+
7
+ def the_description_should_be expected_description
8
+ it "should have a description" do
9
+ described_class.new.description.should == expected_description
10
+ end
11
+ end
12
+
13
+ def a_single_match_of string, expectation
14
+ describe 'detecting a single match' do
15
+
16
+ before :all do
17
+ @filter = described_class.new
18
+ @filter.parse(string)
19
+ end
20
+
21
+ it "should only match once" do
22
+ @filter.matches.should == 1
23
+ end
24
+
25
+ it "should have a score" do
26
+ @filter.score.should == expectation[:should_score]
27
+ end
28
+
29
+ end
30
+ end
31
+
32
+ def a_multiple_match_of string, expectation
33
+ describe 'detecting a multiple matches' do
34
+
35
+ before :all do
36
+ @filter = described_class.new
37
+ @filter.parse(string)
38
+ end
39
+
40
+ it "should match many times" do
41
+ @filter.matches.should == expectation[:should_score].last.count
42
+ end
43
+
44
+ it "should have a score" do
45
+ @filter.score.should == expectation[:should_score].first
46
+ end
47
+
48
+ end
49
+ end
50
+
51
+ def despamilator_should_apply_the_filter_for string
52
+
53
+ it "should be applied during filtering" do
54
+ filter_name = described_class.new.name
55
+ despamilator = Despamilator.new(string)
56
+ despamilator.matched_by.collect { |f| f.name == filter_name }.should_not be_empty
57
+ end
58
+
59
+ end
@@ -0,0 +1,6 @@
1
+ require 'one_hundred_percent_coverage' if ENV['WITH_COVERAGE'].to_i == 1
2
+ require File.join(File.dirname(__FILE__), '..', '..', 'lib', 'despamilator')
3
+
4
+ Dir.glob(File.join(File.dirname(__FILE__), '*.rb')).each do |file|
5
+ require file
6
+ end
data/tasks/test.rake ADDED
@@ -0,0 +1,6 @@
1
+ ENV['WITH_COVERAGE'] = '1'
2
+
3
+ desc "Run the spec tests with coverage"
4
+ task :test do
5
+ Rake::Task[:spec].invoke
6
+ end
metadata CHANGED
@@ -3,9 +3,9 @@ name: despamilator
3
3
  version: !ruby/object:Gem::Version
4
4
  prerelease: false
5
5
  segments:
6
+ - 1
6
7
  - 0
7
- - 8
8
- version: "0.8"
8
+ version: "1.0"
9
9
  platform: ruby
10
10
  authors:
11
11
  - Stephen Hardisty
@@ -13,13 +13,14 @@ autorequire:
13
13
  bindir: bin
14
14
  cert_chain: []
15
15
 
16
- date: 2010-09-25 00:00:00 +10:00
16
+ date: 2011-01-09 00:00:00 +11:00
17
17
  default_executable:
18
18
  dependencies:
19
19
  - !ruby/object:Gem::Dependency
20
20
  name: rubyforge
21
21
  prerelease: false
22
22
  requirement: &id001 !ruby/object:Gem::Requirement
23
+ none: false
23
24
  requirements:
24
25
  - - ">="
25
26
  - !ruby/object:Gem::Version
@@ -34,14 +35,15 @@ dependencies:
34
35
  name: hoe
35
36
  prerelease: false
36
37
  requirement: &id002 !ruby/object:Gem::Requirement
38
+ none: false
37
39
  requirements:
38
40
  - - ">="
39
41
  - !ruby/object:Gem::Version
40
42
  segments:
41
43
  - 2
42
- - 6
44
+ - 7
43
45
  - 0
44
- version: 2.6.0
46
+ version: 2.7.0
45
47
  type: :development
46
48
  version_requirements: *id002
47
49
  description: |-
@@ -59,6 +61,10 @@ extra_rdoc_files:
59
61
  - Manifest.txt
60
62
  - PostInstall.txt
61
63
  files:
64
+ - .rspec
65
+ - .rvmrc
66
+ - Gemfile
67
+ - Gemfile.lock
62
68
  - History.txt
63
69
  - Manifest.txt
64
70
  - PostInstall.txt
@@ -79,116 +85,9 @@ files:
79
85
  - lib/despamilator/filter/square_brackets.rb
80
86
  - lib/despamilator/filter/urls.rb
81
87
  - lib/despamilator/filter_base.rb
82
- - lib/despamilator/validation.rb
83
88
  - scripts/despamilator_score.rb
84
- - spec/clean_corpus/101.txt.gz
85
- - spec/clean_corpus/103.txt.gz
86
- - spec/clean_corpus/105.txt.gz
87
- - spec/clean_corpus/107.txt.gz
88
- - spec/clean_corpus/109.txt.gz
89
- - spec/clean_corpus/111.txt.gz
90
- - spec/clean_corpus/113.txt.gz
91
- - spec/clean_corpus/115.txt.gz
92
- - spec/clean_corpus/117.txt.gz
93
- - spec/clean_corpus/119.txt.gz
94
- - spec/clean_corpus/121.txt.gz
95
- - spec/clean_corpus/123.txt.gz
96
- - spec/clean_corpus/125.txt.gz
97
- - spec/clean_corpus/127.txt.gz
98
- - spec/clean_corpus/129.txt.gz
99
- - spec/clean_corpus/131.txt.gz
100
- - spec/clean_corpus/133.txt.gz
101
- - spec/clean_corpus/135.txt.gz
102
- - spec/clean_corpus/137.txt.gz
103
- - spec/clean_corpus/139.txt.gz
104
- - spec/clean_corpus/141.txt.gz
105
- - spec/clean_corpus/143.txt.gz
106
- - spec/clean_corpus/145.txt.gz
107
- - spec/clean_corpus/147.txt.gz
108
- - spec/clean_corpus/149.txt.gz
109
- - spec/clean_corpus/151.txt.gz
110
- - spec/clean_corpus/153.txt.gz
111
- - spec/clean_corpus/155.txt.gz
112
- - spec/clean_corpus/157.txt.gz
113
- - spec/clean_corpus/159.txt.gz
114
- - spec/clean_corpus/161.txt.gz
115
- - spec/clean_corpus/163.txt.gz
116
- - spec/clean_corpus/165.txt.gz
117
- - spec/clean_corpus/167.txt.gz
118
- - spec/clean_corpus/169.txt.gz
119
- - spec/clean_corpus/171.txt.gz
120
- - spec/clean_corpus/173.txt.gz
121
- - spec/clean_corpus/175.txt.gz
122
- - spec/clean_corpus/177.txt.gz
123
- - spec/clean_corpus/179.txt.gz
124
- - spec/clean_corpus/18.txt.gz
125
- - spec/clean_corpus/181.txt.gz
126
- - spec/clean_corpus/183.txt.gz
127
- - spec/clean_corpus/185.txt.gz
128
- - spec/clean_corpus/187.txt.gz
129
- - spec/clean_corpus/189.txt.gz
130
- - spec/clean_corpus/191.txt.gz
131
- - spec/clean_corpus/193.txt.gz
132
- - spec/clean_corpus/195.txt.gz
133
- - spec/clean_corpus/197.txt.gz
134
- - spec/clean_corpus/199.txt.gz
135
- - spec/clean_corpus/20.txt.gz
136
- - spec/clean_corpus/201.txt.gz
137
- - spec/clean_corpus/203.txt.gz
138
- - spec/clean_corpus/205.txt.gz
139
- - spec/clean_corpus/207.txt.gz
140
- - spec/clean_corpus/209.txt.gz
141
- - spec/clean_corpus/211.txt.gz
142
- - spec/clean_corpus/213.txt.gz
143
- - spec/clean_corpus/215.txt.gz
144
- - spec/clean_corpus/217.txt.gz
145
- - spec/clean_corpus/219.txt.gz
146
- - spec/clean_corpus/22.txt.gz
147
- - spec/clean_corpus/221.txt.gz
148
- - spec/clean_corpus/223.txt.gz
149
- - spec/clean_corpus/225.txt.gz
150
- - spec/clean_corpus/24.txt.gz
151
- - spec/clean_corpus/26.txt.gz
152
- - spec/clean_corpus/27.txt.gz
153
- - spec/clean_corpus/29.txt.gz
154
- - spec/clean_corpus/31.txt.gz
155
- - spec/clean_corpus/33.txt.gz
156
- - spec/clean_corpus/35.txt.gz
157
- - spec/clean_corpus/37.txt.gz
158
- - spec/clean_corpus/39.txt.gz
159
- - spec/clean_corpus/41.txt.gz
160
- - spec/clean_corpus/43.txt.gz
161
- - spec/clean_corpus/45.txt.gz
162
- - spec/clean_corpus/47.txt.gz
163
- - spec/clean_corpus/49.txt.gz
164
- - spec/clean_corpus/51.txt.gz
165
- - spec/clean_corpus/53.txt.gz
166
- - spec/clean_corpus/55.txt.gz
167
- - spec/clean_corpus/57.txt.gz
168
- - spec/clean_corpus/59.txt.gz
169
- - spec/clean_corpus/61.txt.gz
170
- - spec/clean_corpus/63.txt.gz
171
- - spec/clean_corpus/65.txt.gz
172
- - spec/clean_corpus/67.txt.gz
173
- - spec/clean_corpus/69.txt.gz
174
- - spec/clean_corpus/71.txt.gz
175
- - spec/clean_corpus/73.txt.gz
176
- - spec/clean_corpus/75.txt.gz
177
- - spec/clean_corpus/77.txt.gz
178
- - spec/clean_corpus/79.txt.gz
179
- - spec/clean_corpus/81.txt.gz
180
- - spec/clean_corpus/83.txt.gz
181
- - spec/clean_corpus/85.txt.gz
182
- - spec/clean_corpus/87.txt.gz
183
- - spec/clean_corpus/89.txt.gz
184
- - spec/clean_corpus/91.txt.gz
185
- - spec/clean_corpus/93.txt.gz
186
- - spec/clean_corpus/95.txt.gz
187
- - spec/clean_corpus/97.txt.gz
188
- - spec/clean_corpus/99.txt.gz
189
- - spec/clean_corpus_spec.rb
190
89
  - spec/despamilator_spec.rb
191
- - spec/despamilator_validation_spec.rb
90
+ - spec/filter_base_spec.rb
192
91
  - spec/filters/funky_consonant_spec.rb
193
92
  - spec/filters/html_tags_spec.rb
194
93
  - spec/filters/ip_address_url_spec.rb
@@ -200,503 +99,10 @@ files:
200
99
  - spec/filters/shouting_spec.rb
201
100
  - spec/filters/square_brackets_spec.rb
202
101
  - spec/filters/urls_spec.rb
203
- - spec/spam_corpus/0.txt.gz
204
- - spec/spam_corpus/1.txt.gz
205
- - spec/spam_corpus/10.txt.gz
206
- - spec/spam_corpus/100.txt.gz
207
- - spec/spam_corpus/102.txt.gz
208
- - spec/spam_corpus/104.txt.gz
209
- - spec/spam_corpus/106.txt.gz
210
- - spec/spam_corpus/108.txt.gz
211
- - spec/spam_corpus/11.txt.gz
212
- - spec/spam_corpus/110.txt.gz
213
- - spec/spam_corpus/112.txt.gz
214
- - spec/spam_corpus/114.txt.gz
215
- - spec/spam_corpus/116.txt.gz
216
- - spec/spam_corpus/118.txt.gz
217
- - spec/spam_corpus/12.txt.gz
218
- - spec/spam_corpus/120.txt.gz
219
- - spec/spam_corpus/122.txt.gz
220
- - spec/spam_corpus/124.txt.gz
221
- - spec/spam_corpus/126.txt.gz
222
- - spec/spam_corpus/128.txt.gz
223
- - spec/spam_corpus/13.txt.gz
224
- - spec/spam_corpus/130.txt.gz
225
- - spec/spam_corpus/132.txt.gz
226
- - spec/spam_corpus/134.txt.gz
227
- - spec/spam_corpus/136.txt.gz
228
- - spec/spam_corpus/138.txt.gz
229
- - spec/spam_corpus/14.txt.gz
230
- - spec/spam_corpus/140.txt.gz
231
- - spec/spam_corpus/142.txt.gz
232
- - spec/spam_corpus/144.txt.gz
233
- - spec/spam_corpus/146.txt.gz
234
- - spec/spam_corpus/148.txt.gz
235
- - spec/spam_corpus/15.txt.gz
236
- - spec/spam_corpus/150.txt.gz
237
- - spec/spam_corpus/152.txt.gz
238
- - spec/spam_corpus/154.txt.gz
239
- - spec/spam_corpus/156.txt.gz
240
- - spec/spam_corpus/158.txt.gz
241
- - spec/spam_corpus/16.txt.gz
242
- - spec/spam_corpus/160.txt.gz
243
- - spec/spam_corpus/162.txt.gz
244
- - spec/spam_corpus/164.txt.gz
245
- - spec/spam_corpus/166.txt.gz
246
- - spec/spam_corpus/168.txt.gz
247
- - spec/spam_corpus/170.txt.gz
248
- - spec/spam_corpus/172.txt.gz
249
- - spec/spam_corpus/174.txt.gz
250
- - spec/spam_corpus/176.txt.gz
251
- - spec/spam_corpus/178.txt.gz
252
- - spec/spam_corpus/180.txt.gz
253
- - spec/spam_corpus/182.txt.gz
254
- - spec/spam_corpus/184.txt.gz
255
- - spec/spam_corpus/186.txt.gz
256
- - spec/spam_corpus/188.txt.gz
257
- - spec/spam_corpus/190.txt.gz
258
- - spec/spam_corpus/192.txt.gz
259
- - spec/spam_corpus/194.txt.gz
260
- - spec/spam_corpus/196.txt.gz
261
- - spec/spam_corpus/198.txt.gz
262
- - spec/spam_corpus/2.txt.gz
263
- - spec/spam_corpus/200.txt.gz
264
- - spec/spam_corpus/202.txt.gz
265
- - spec/spam_corpus/204.txt.gz
266
- - spec/spam_corpus/206.txt.gz
267
- - spec/spam_corpus/208.txt.gz
268
- - spec/spam_corpus/210.txt.gz
269
- - spec/spam_corpus/212.txt.gz
270
- - spec/spam_corpus/214.txt.gz
271
- - spec/spam_corpus/216.txt.gz
272
- - spec/spam_corpus/218.txt.gz
273
- - spec/spam_corpus/220.txt.gz
274
- - spec/spam_corpus/222.txt.gz
275
- - spec/spam_corpus/224.txt.gz
276
- - spec/spam_corpus/226.txt.gz
277
- - spec/spam_corpus/228.txt.gz
278
- - spec/spam_corpus/230.txt.gz
279
- - spec/spam_corpus/232.txt.gz
280
- - spec/spam_corpus/234.txt.gz
281
- - spec/spam_corpus/236.txt.gz
282
- - spec/spam_corpus/238.txt.gz
283
- - spec/spam_corpus/240.txt.gz
284
- - spec/spam_corpus/242.txt.gz
285
- - spec/spam_corpus/244.txt.gz
286
- - spec/spam_corpus/246.txt.gz
287
- - spec/spam_corpus/248.txt.gz
288
- - spec/spam_corpus/250.txt.gz
289
- - spec/spam_corpus/252.txt.gz
290
- - spec/spam_corpus/254.txt.gz
291
- - spec/spam_corpus/256.txt.gz
292
- - spec/spam_corpus/258.txt.gz
293
- - spec/spam_corpus/260.txt.gz
294
- - spec/spam_corpus/262.txt.gz
295
- - spec/spam_corpus/264.txt.gz
296
- - spec/spam_corpus/266.txt.gz
297
- - spec/spam_corpus/268.txt.gz
298
- - spec/spam_corpus/270.txt.gz
299
- - spec/spam_corpus/272.txt.gz
300
- - spec/spam_corpus/274.txt.gz
301
- - spec/spam_corpus/276.txt.gz
302
- - spec/spam_corpus/278.txt.gz
303
- - spec/spam_corpus/28.txt.gz
304
- - spec/spam_corpus/280.txt.gz
305
- - spec/spam_corpus/282.txt.gz
306
- - spec/spam_corpus/284.txt.gz
307
- - spec/spam_corpus/286.txt.gz
308
- - spec/spam_corpus/288.txt.gz
309
- - spec/spam_corpus/290.txt.gz
310
- - spec/spam_corpus/292.txt.gz
311
- - spec/spam_corpus/294.txt.gz
312
- - spec/spam_corpus/296.txt.gz
313
- - spec/spam_corpus/298.txt.gz
314
- - spec/spam_corpus/3.txt.gz
315
- - spec/spam_corpus/30.txt.gz
316
- - spec/spam_corpus/300.txt.gz
317
- - spec/spam_corpus/302.txt.gz
318
- - spec/spam_corpus/304.txt.gz
319
- - spec/spam_corpus/306.txt.gz
320
- - spec/spam_corpus/308.txt.gz
321
- - spec/spam_corpus/310.txt.gz
322
- - spec/spam_corpus/312.txt.gz
323
- - spec/spam_corpus/314.txt.gz
324
- - spec/spam_corpus/316.txt.gz
325
- - spec/spam_corpus/318.txt.gz
326
- - spec/spam_corpus/32.txt.gz
327
- - spec/spam_corpus/320.txt.gz
328
- - spec/spam_corpus/322.txt.gz
329
- - spec/spam_corpus/324.txt.gz
330
- - spec/spam_corpus/326.txt.gz
331
- - spec/spam_corpus/328.txt.gz
332
- - spec/spam_corpus/330.txt.gz
333
- - spec/spam_corpus/332.txt.gz
334
- - spec/spam_corpus/334.txt.gz
335
- - spec/spam_corpus/336.txt.gz
336
- - spec/spam_corpus/338.txt.gz
337
- - spec/spam_corpus/34.txt.gz
338
- - spec/spam_corpus/340.txt.gz
339
- - spec/spam_corpus/342.txt.gz
340
- - spec/spam_corpus/344.txt.gz
341
- - spec/spam_corpus/346.txt.gz
342
- - spec/spam_corpus/348.txt.gz
343
- - spec/spam_corpus/350.txt.gz
344
- - spec/spam_corpus/352.txt.gz
345
- - spec/spam_corpus/354.txt.gz
346
- - spec/spam_corpus/356.txt.gz
347
- - spec/spam_corpus/358.txt.gz
348
- - spec/spam_corpus/36.txt.gz
349
- - spec/spam_corpus/360.txt.gz
350
- - spec/spam_corpus/362.txt.gz
351
- - spec/spam_corpus/364.txt.gz
352
- - spec/spam_corpus/366.txt.gz
353
- - spec/spam_corpus/368.txt.gz
354
- - spec/spam_corpus/370.txt.gz
355
- - spec/spam_corpus/372.txt.gz
356
- - spec/spam_corpus/374.txt.gz
357
- - spec/spam_corpus/376.txt.gz
358
- - spec/spam_corpus/378.txt.gz
359
- - spec/spam_corpus/38.txt.gz
360
- - spec/spam_corpus/380.txt.gz
361
- - spec/spam_corpus/382.txt.gz
362
- - spec/spam_corpus/384.txt.gz
363
- - spec/spam_corpus/386.txt.gz
364
- - spec/spam_corpus/388.txt.gz
365
- - spec/spam_corpus/390.txt.gz
366
- - spec/spam_corpus/392.txt.gz
367
- - spec/spam_corpus/394.txt.gz
368
- - spec/spam_corpus/396.txt.gz
369
- - spec/spam_corpus/398.txt.gz
370
- - spec/spam_corpus/4.txt.gz
371
- - spec/spam_corpus/40.txt.gz
372
- - spec/spam_corpus/400.txt.gz
373
- - spec/spam_corpus/402.txt.gz
374
- - spec/spam_corpus/404.txt.gz
375
- - spec/spam_corpus/406.txt.gz
376
- - spec/spam_corpus/408.txt.gz
377
- - spec/spam_corpus/410.txt.gz
378
- - spec/spam_corpus/412.txt.gz
379
- - spec/spam_corpus/414.txt.gz
380
- - spec/spam_corpus/416.txt.gz
381
- - spec/spam_corpus/418.txt.gz
382
- - spec/spam_corpus/42.txt.gz
383
- - spec/spam_corpus/420.txt.gz
384
- - spec/spam_corpus/422.txt.gz
385
- - spec/spam_corpus/424.txt.gz
386
- - spec/spam_corpus/426.txt.gz
387
- - spec/spam_corpus/428.txt.gz
388
- - spec/spam_corpus/430.txt.gz
389
- - spec/spam_corpus/432.txt.gz
390
- - spec/spam_corpus/434.txt.gz
391
- - spec/spam_corpus/436.txt.gz
392
- - spec/spam_corpus/438.txt.gz
393
- - spec/spam_corpus/44.txt.gz
394
- - spec/spam_corpus/440.txt.gz
395
- - spec/spam_corpus/442.txt.gz
396
- - spec/spam_corpus/444.txt.gz
397
- - spec/spam_corpus/446.txt.gz
398
- - spec/spam_corpus/448.txt.gz
399
- - spec/spam_corpus/450.txt.gz
400
- - spec/spam_corpus/452.txt.gz
401
- - spec/spam_corpus/454.txt.gz
402
- - spec/spam_corpus/456.txt.gz
403
- - spec/spam_corpus/458.txt.gz
404
- - spec/spam_corpus/46.txt.gz
405
- - spec/spam_corpus/460.txt.gz
406
- - spec/spam_corpus/462.txt.gz
407
- - spec/spam_corpus/464.txt.gz
408
- - spec/spam_corpus/466.txt.gz
409
- - spec/spam_corpus/468.txt.gz
410
- - spec/spam_corpus/470.txt.gz
411
- - spec/spam_corpus/472.txt.gz
412
- - spec/spam_corpus/474.txt.gz
413
- - spec/spam_corpus/476.txt.gz
414
- - spec/spam_corpus/478.txt.gz
415
- - spec/spam_corpus/48.txt.gz
416
- - spec/spam_corpus/480.txt.gz
417
- - spec/spam_corpus/482.txt.gz
418
- - spec/spam_corpus/484.txt.gz
419
- - spec/spam_corpus/486.txt.gz
420
- - spec/spam_corpus/488.txt.gz
421
- - spec/spam_corpus/490.txt.gz
422
- - spec/spam_corpus/492.txt.gz
423
- - spec/spam_corpus/494.txt.gz
424
- - spec/spam_corpus/496.txt.gz
425
- - spec/spam_corpus/498.txt.gz
426
- - spec/spam_corpus/5.txt.gz
427
- - spec/spam_corpus/50.txt.gz
428
- - spec/spam_corpus/500.txt.gz
429
- - spec/spam_corpus/502.txt.gz
430
- - spec/spam_corpus/504.txt.gz
431
- - spec/spam_corpus/506.txt.gz
432
- - spec/spam_corpus/508.txt.gz
433
- - spec/spam_corpus/510.txt.gz
434
- - spec/spam_corpus/512.txt.gz
435
- - spec/spam_corpus/514.txt.gz
436
- - spec/spam_corpus/516.txt.gz
437
- - spec/spam_corpus/518.txt.gz
438
- - spec/spam_corpus/52.txt.gz
439
- - spec/spam_corpus/520.txt.gz
440
- - spec/spam_corpus/522.txt.gz
441
- - spec/spam_corpus/524.txt.gz
442
- - spec/spam_corpus/526.txt.gz
443
- - spec/spam_corpus/528.txt.gz
444
- - spec/spam_corpus/530.txt.gz
445
- - spec/spam_corpus/532.txt.gz
446
- - spec/spam_corpus/534.txt.gz
447
- - spec/spam_corpus/536.txt.gz
448
- - spec/spam_corpus/538.txt.gz
449
- - spec/spam_corpus/54.txt.gz
450
- - spec/spam_corpus/540.txt.gz
451
- - spec/spam_corpus/542.txt.gz
452
- - spec/spam_corpus/544.txt.gz
453
- - spec/spam_corpus/546.txt.gz
454
- - spec/spam_corpus/548.txt.gz
455
- - spec/spam_corpus/550.txt.gz
456
- - spec/spam_corpus/552.txt.gz
457
- - spec/spam_corpus/554.txt.gz
458
- - spec/spam_corpus/556.txt.gz
459
- - spec/spam_corpus/558.txt.gz
460
- - spec/spam_corpus/56.txt.gz
461
- - spec/spam_corpus/560.txt.gz
462
- - spec/spam_corpus/562.txt.gz
463
- - spec/spam_corpus/564.txt.gz
464
- - spec/spam_corpus/566.txt.gz
465
- - spec/spam_corpus/568.txt.gz
466
- - spec/spam_corpus/570.txt.gz
467
- - spec/spam_corpus/572.txt.gz
468
- - spec/spam_corpus/574.txt.gz
469
- - spec/spam_corpus/576.txt.gz
470
- - spec/spam_corpus/578.txt.gz
471
- - spec/spam_corpus/58.txt.gz
472
- - spec/spam_corpus/580.txt.gz
473
- - spec/spam_corpus/582.txt.gz
474
- - spec/spam_corpus/584.txt.gz
475
- - spec/spam_corpus/586.txt.gz
476
- - spec/spam_corpus/588.txt.gz
477
- - spec/spam_corpus/590.txt.gz
478
- - spec/spam_corpus/592.txt.gz
479
- - spec/spam_corpus/594.txt.gz
480
- - spec/spam_corpus/596.txt.gz
481
- - spec/spam_corpus/598.txt.gz
482
- - spec/spam_corpus/6.txt.gz
483
- - spec/spam_corpus/60.txt.gz
484
- - spec/spam_corpus/600.txt.gz
485
- - spec/spam_corpus/602.txt.gz
486
- - spec/spam_corpus/604.txt.gz
487
- - spec/spam_corpus/606.txt.gz
488
- - spec/spam_corpus/608.txt.gz
489
- - spec/spam_corpus/610.txt.gz
490
- - spec/spam_corpus/612.txt.gz
491
- - spec/spam_corpus/614.txt.gz
492
- - spec/spam_corpus/616.txt.gz
493
- - spec/spam_corpus/618.txt.gz
494
- - spec/spam_corpus/62.txt.gz
495
- - spec/spam_corpus/620.txt.gz
496
- - spec/spam_corpus/622.txt.gz
497
- - spec/spam_corpus/624.txt.gz
498
- - spec/spam_corpus/626.txt.gz
499
- - spec/spam_corpus/628.txt.gz
500
- - spec/spam_corpus/630.txt.gz
501
- - spec/spam_corpus/632.txt.gz
502
- - spec/spam_corpus/634.txt.gz
503
- - spec/spam_corpus/636.txt.gz
504
- - spec/spam_corpus/638.txt.gz
505
- - spec/spam_corpus/64.txt.gz
506
- - spec/spam_corpus/640.txt.gz
507
- - spec/spam_corpus/642.txt.gz
508
- - spec/spam_corpus/644.txt.gz
509
- - spec/spam_corpus/646.txt.gz
510
- - spec/spam_corpus/648.txt.gz
511
- - spec/spam_corpus/650.txt.gz
512
- - spec/spam_corpus/652.txt.gz
513
- - spec/spam_corpus/654.txt.gz
514
- - spec/spam_corpus/656.txt.gz
515
- - spec/spam_corpus/658.txt.gz
516
- - spec/spam_corpus/66.txt.gz
517
- - spec/spam_corpus/660.txt.gz
518
- - spec/spam_corpus/662.txt.gz
519
- - spec/spam_corpus/664.txt.gz
520
- - spec/spam_corpus/666.txt.gz
521
- - spec/spam_corpus/668.txt.gz
522
- - spec/spam_corpus/670.txt.gz
523
- - spec/spam_corpus/672.txt.gz
524
- - spec/spam_corpus/674.txt.gz
525
- - spec/spam_corpus/676.txt.gz
526
- - spec/spam_corpus/678.txt.gz
527
- - spec/spam_corpus/68.txt.gz
528
- - spec/spam_corpus/680.txt.gz
529
- - spec/spam_corpus/682.txt.gz
530
- - spec/spam_corpus/684.txt.gz
531
- - spec/spam_corpus/686.txt.gz
532
- - spec/spam_corpus/688.txt.gz
533
- - spec/spam_corpus/690.txt.gz
534
- - spec/spam_corpus/692.txt.gz
535
- - spec/spam_corpus/694.txt.gz
536
- - spec/spam_corpus/696.txt.gz
537
- - spec/spam_corpus/698.txt.gz
538
- - spec/spam_corpus/7.txt.gz
539
- - spec/spam_corpus/70.txt.gz
540
- - spec/spam_corpus/700.txt.gz
541
- - spec/spam_corpus/702.txt.gz
542
- - spec/spam_corpus/704.txt.gz
543
- - spec/spam_corpus/706.txt.gz
544
- - spec/spam_corpus/708.txt.gz
545
- - spec/spam_corpus/710.txt.gz
546
- - spec/spam_corpus/712.txt.gz
547
- - spec/spam_corpus/714.txt.gz
548
- - spec/spam_corpus/716.txt.gz
549
- - spec/spam_corpus/718.txt.gz
550
- - spec/spam_corpus/72.txt.gz
551
- - spec/spam_corpus/720.txt.gz
552
- - spec/spam_corpus/722.txt.gz
553
- - spec/spam_corpus/724.txt.gz
554
- - spec/spam_corpus/726.txt.gz
555
- - spec/spam_corpus/728.txt.gz
556
- - spec/spam_corpus/730.txt.gz
557
- - spec/spam_corpus/732.txt.gz
558
- - spec/spam_corpus/734.txt.gz
559
- - spec/spam_corpus/736.txt.gz
560
- - spec/spam_corpus/738.txt.gz
561
- - spec/spam_corpus/74.txt.gz
562
- - spec/spam_corpus/740.txt.gz
563
- - spec/spam_corpus/742.txt.gz
564
- - spec/spam_corpus/744.txt.gz
565
- - spec/spam_corpus/746.txt.gz
566
- - spec/spam_corpus/748.txt.gz
567
- - spec/spam_corpus/750.txt.gz
568
- - spec/spam_corpus/752.txt.gz
569
- - spec/spam_corpus/754.txt.gz
570
- - spec/spam_corpus/756.txt.gz
571
- - spec/spam_corpus/758.txt.gz
572
- - spec/spam_corpus/76.txt.gz
573
- - spec/spam_corpus/760.txt.gz
574
- - spec/spam_corpus/762.txt.gz
575
- - spec/spam_corpus/764.txt.gz
576
- - spec/spam_corpus/766.txt.gz
577
- - spec/spam_corpus/768.txt.gz
578
- - spec/spam_corpus/770.txt.gz
579
- - spec/spam_corpus/772.txt.gz
580
- - spec/spam_corpus/774.txt.gz
581
- - spec/spam_corpus/776.txt.gz
582
- - spec/spam_corpus/778.txt.gz
583
- - spec/spam_corpus/78.txt.gz
584
- - spec/spam_corpus/780.txt.gz
585
- - spec/spam_corpus/782.txt.gz
586
- - spec/spam_corpus/784.txt.gz
587
- - spec/spam_corpus/786.txt.gz
588
- - spec/spam_corpus/788.txt.gz
589
- - spec/spam_corpus/790.txt.gz
590
- - spec/spam_corpus/792.txt.gz
591
- - spec/spam_corpus/794.txt.gz
592
- - spec/spam_corpus/796.txt.gz
593
- - spec/spam_corpus/798.txt.gz
594
- - spec/spam_corpus/8.txt.gz
595
- - spec/spam_corpus/80.txt.gz
596
- - spec/spam_corpus/800.txt.gz
597
- - spec/spam_corpus/802.txt.gz
598
- - spec/spam_corpus/804.txt.gz
599
- - spec/spam_corpus/806.txt.gz
600
- - spec/spam_corpus/808.txt.gz
601
- - spec/spam_corpus/810.txt.gz
602
- - spec/spam_corpus/812.txt.gz
603
- - spec/spam_corpus/814.txt.gz
604
- - spec/spam_corpus/816.txt.gz
605
- - spec/spam_corpus/818.txt.gz
606
- - spec/spam_corpus/82.txt.gz
607
- - spec/spam_corpus/820.txt.gz
608
- - spec/spam_corpus/822.txt.gz
609
- - spec/spam_corpus/824.txt.gz
610
- - spec/spam_corpus/826.txt.gz
611
- - spec/spam_corpus/828.txt.gz
612
- - spec/spam_corpus/830.txt.gz
613
- - spec/spam_corpus/832.txt.gz
614
- - spec/spam_corpus/834.txt.gz
615
- - spec/spam_corpus/836.txt.gz
616
- - spec/spam_corpus/838.txt.gz
617
- - spec/spam_corpus/84.txt.gz
618
- - spec/spam_corpus/840.txt.gz
619
- - spec/spam_corpus/842.txt.gz
620
- - spec/spam_corpus/844.txt.gz
621
- - spec/spam_corpus/846.txt.gz
622
- - spec/spam_corpus/848.txt.gz
623
- - spec/spam_corpus/850.txt.gz
624
- - spec/spam_corpus/852.txt.gz
625
- - spec/spam_corpus/854.txt.gz
626
- - spec/spam_corpus/856.txt.gz
627
- - spec/spam_corpus/858.txt.gz
628
- - spec/spam_corpus/86.txt.gz
629
- - spec/spam_corpus/860.txt.gz
630
- - spec/spam_corpus/862.txt.gz
631
- - spec/spam_corpus/864.txt.gz
632
- - spec/spam_corpus/866.txt.gz
633
- - spec/spam_corpus/868.txt.gz
634
- - spec/spam_corpus/870.txt.gz
635
- - spec/spam_corpus/872.txt.gz
636
- - spec/spam_corpus/874.txt.gz
637
- - spec/spam_corpus/876.txt.gz
638
- - spec/spam_corpus/878.txt.gz
639
- - spec/spam_corpus/88.txt.gz
640
- - spec/spam_corpus/880.txt.gz
641
- - spec/spam_corpus/882.txt.gz
642
- - spec/spam_corpus/884.txt.gz
643
- - spec/spam_corpus/886.txt.gz
644
- - spec/spam_corpus/888.txt.gz
645
- - spec/spam_corpus/890.txt.gz
646
- - spec/spam_corpus/892.txt.gz
647
- - spec/spam_corpus/894.txt.gz
648
- - spec/spam_corpus/896.txt.gz
649
- - spec/spam_corpus/898.txt.gz
650
- - spec/spam_corpus/9.txt.gz
651
- - spec/spam_corpus/90.txt.gz
652
- - spec/spam_corpus/900.txt.gz
653
- - spec/spam_corpus/902.txt.gz
654
- - spec/spam_corpus/904.txt.gz
655
- - spec/spam_corpus/906.txt.gz
656
- - spec/spam_corpus/908.txt.gz
657
- - spec/spam_corpus/910.txt.gz
658
- - spec/spam_corpus/912.txt.gz
659
- - spec/spam_corpus/914.txt.gz
660
- - spec/spam_corpus/916.txt.gz
661
- - spec/spam_corpus/918.txt.gz
662
- - spec/spam_corpus/92.txt.gz
663
- - spec/spam_corpus/920.txt.gz
664
- - spec/spam_corpus/922.txt.gz
665
- - spec/spam_corpus/924.txt.gz
666
- - spec/spam_corpus/926.txt.gz
667
- - spec/spam_corpus/928.txt.gz
668
- - spec/spam_corpus/930.txt.gz
669
- - spec/spam_corpus/932.txt.gz
670
- - spec/spam_corpus/934.txt.gz
671
- - spec/spam_corpus/936.txt.gz
672
- - spec/spam_corpus/938.txt.gz
673
- - spec/spam_corpus/94.txt.gz
674
- - spec/spam_corpus/940.txt.gz
675
- - spec/spam_corpus/942.txt.gz
676
- - spec/spam_corpus/944.txt.gz
677
- - spec/spam_corpus/946.txt.gz
678
- - spec/spam_corpus/948.txt.gz
679
- - spec/spam_corpus/950.txt.gz
680
- - spec/spam_corpus/952.txt.gz
681
- - spec/spam_corpus/954.txt.gz
682
- - spec/spam_corpus/956.txt.gz
683
- - spec/spam_corpus/958.txt.gz
684
- - spec/spam_corpus/96.txt.gz
685
- - spec/spam_corpus/960.txt.gz
686
- - spec/spam_corpus/962.txt.gz
687
- - spec/spam_corpus/964.txt.gz
688
- - spec/spam_corpus/966.txt.gz
689
- - spec/spam_corpus/968.txt.gz
690
- - spec/spam_corpus/970.txt.gz
691
- - spec/spam_corpus/972.txt.gz
692
- - spec/spam_corpus/974.txt.gz
693
- - spec/spam_corpus/98.txt.gz
694
- - spec/spam_corpus/debugyouradd.com.txt.gz
695
- - spec/spam_corpus/humandesignconsulting.comm.txt.gz
696
- - spec/spam_corpus_spec.rb
697
- - spec/spec.opts
698
- - spec/spec_helper.rb
699
- - tasks/rspec.rake
102
+ - spec/helpers/corpus_helper.rb
103
+ - spec/helpers/filter_helper.rb
104
+ - spec/helpers/spec_helper.rb
105
+ - tasks/test.rake
700
106
  has_rdoc: true
701
107
  homepage: http://github.com/moowahaha/despamilator
702
108
  licenses: []
@@ -708,6 +114,7 @@ rdoc_options:
708
114
  require_paths:
709
115
  - lib
710
116
  required_ruby_version: !ruby/object:Gem::Requirement
117
+ none: false
711
118
  requirements:
712
119
  - - ">="
713
120
  - !ruby/object:Gem::Version
@@ -715,6 +122,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
715
122
  - 0
716
123
  version: "0"
717
124
  required_rubygems_version: !ruby/object:Gem::Requirement
125
+ none: false
718
126
  requirements:
719
127
  - - ">="
720
128
  - !ruby/object:Gem::Version
@@ -724,7 +132,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
724
132
  requirements: []
725
133
 
726
134
  rubyforge_project: despamilator
727
- rubygems_version: 1.3.6
135
+ rubygems_version: 1.3.7
728
136
  signing_key:
729
137
  specification_version: 3
730
138
  summary: "Despamilator is a plugin based spam detector designed for use on your web forms borne out of two annoyances: Spam being submitted in my web forms and CAPTCHAS being intrusive"