biblicit 1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (322) hide show
  1. data/.gitignore +3 -0
  2. data/.rspec +1 -0
  3. data/Gemfile +6 -0
  4. data/LICENSE.TXT +176 -0
  5. data/README.md +120 -0
  6. data/Rakefile +8 -0
  7. data/biblicit.gemspec +33 -0
  8. data/lib/biblicit/cb2bib.rb +83 -0
  9. data/lib/biblicit/citeseer.rb +53 -0
  10. data/lib/biblicit/extractor.rb +37 -0
  11. data/lib/biblicit.rb +6 -0
  12. data/perl/DocFilter/lib/CSXUtil/SafeText.pm +140 -0
  13. data/perl/DocFilter/lib/DocFilter/Config.pm +35 -0
  14. data/perl/DocFilter/lib/DocFilter/Filter.pm +51 -0
  15. data/perl/FileConversionService/README.TXT +11 -0
  16. data/perl/FileConversionService/converters/PDFBox/pdfbox-app-1.7.1.jar +0 -0
  17. data/perl/FileConversionService/lib/CSXUtil/SafeText.pm +140 -0
  18. data/perl/FileConversionService/lib/FileConverter/CheckSum.pm +77 -0
  19. data/perl/FileConversionService/lib/FileConverter/Compression.pm +137 -0
  20. data/perl/FileConversionService/lib/FileConverter/Config.pm +57 -0
  21. data/perl/FileConversionService/lib/FileConverter/Controller.pm +191 -0
  22. data/perl/FileConversionService/lib/FileConverter/JODConverter.pm +61 -0
  23. data/perl/FileConversionService/lib/FileConverter/PDFBox.pm +69 -0
  24. data/perl/FileConversionService/lib/FileConverter/PSConverter.pm +69 -0
  25. data/perl/FileConversionService/lib/FileConverter/PSToText.pm +88 -0
  26. data/perl/FileConversionService/lib/FileConverter/Prescript.pm +68 -0
  27. data/perl/FileConversionService/lib/FileConverter/TET.pm +75 -0
  28. data/perl/FileConversionService/lib/FileConverter/Utils.pm +130 -0
  29. data/perl/HeaderParseService/README.TXT +80 -0
  30. data/perl/HeaderParseService/lib/CSXUtil/SafeText.pm +140 -0
  31. data/perl/HeaderParseService/lib/HeaderParse/API/AssembleXMLMetadata.pm +968 -0
  32. data/perl/HeaderParseService/lib/HeaderParse/API/Function.pm +2016 -0
  33. data/perl/HeaderParseService/lib/HeaderParse/API/LoadInformation.pm +444 -0
  34. data/perl/HeaderParseService/lib/HeaderParse/API/MultiClassChunking.pm +409 -0
  35. data/perl/HeaderParseService/lib/HeaderParse/API/NamePatternMatch.pm +537 -0
  36. data/perl/HeaderParseService/lib/HeaderParse/API/Parser.pm +68 -0
  37. data/perl/HeaderParseService/lib/HeaderParse/API/ParserMethods.pm +1880 -0
  38. data/perl/HeaderParseService/lib/HeaderParse/Config/API_Config.pm +46 -0
  39. data/perl/HeaderParseService/resources/data/EbizHeaders.txt +24330 -0
  40. data/perl/HeaderParseService/resources/data/EbizHeaders.txt.parsed +27506 -0
  41. data/perl/HeaderParseService/resources/data/EbizHeaders.txt.parsed.old +26495 -0
  42. data/perl/HeaderParseService/resources/data/tagged_headers.txt +40668 -0
  43. data/perl/HeaderParseService/resources/data/test_header.txt +31 -0
  44. data/perl/HeaderParseService/resources/data/test_header.txt.parsed +31 -0
  45. data/perl/HeaderParseService/resources/database/50states +60 -0
  46. data/perl/HeaderParseService/resources/database/AddrTopWords.txt +17 -0
  47. data/perl/HeaderParseService/resources/database/AffiTopWords.txt +35 -0
  48. data/perl/HeaderParseService/resources/database/AffiTopWordsAll.txt +533 -0
  49. data/perl/HeaderParseService/resources/database/ChineseSurNames.txt +276 -0
  50. data/perl/HeaderParseService/resources/database/Csurnames.bin +0 -0
  51. data/perl/HeaderParseService/resources/database/Csurnames_spec.bin +0 -0
  52. data/perl/HeaderParseService/resources/database/DomainSuffixes.txt +242 -0
  53. data/perl/HeaderParseService/resources/database/LabeledHeader +18 -0
  54. data/perl/HeaderParseService/resources/database/README +2 -0
  55. data/perl/HeaderParseService/resources/database/TrainMulClassLines +254 -0
  56. data/perl/HeaderParseService/resources/database/TrainMulClassLines1 +510 -0
  57. data/perl/HeaderParseService/resources/database/abstract.txt +1 -0
  58. data/perl/HeaderParseService/resources/database/abstractTopWords +9 -0
  59. data/perl/HeaderParseService/resources/database/addr.txt +28 -0
  60. data/perl/HeaderParseService/resources/database/affi.txt +34 -0
  61. data/perl/HeaderParseService/resources/database/affis.bin +0 -0
  62. data/perl/HeaderParseService/resources/database/all_namewords_spec.bin +0 -0
  63. data/perl/HeaderParseService/resources/database/allnamewords.bin +0 -0
  64. data/perl/HeaderParseService/resources/database/cities_US.txt +4512 -0
  65. data/perl/HeaderParseService/resources/database/cities_world.txt +4463 -0
  66. data/perl/HeaderParseService/resources/database/city.txt +3150 -0
  67. data/perl/HeaderParseService/resources/database/cityname.txt +3151 -0
  68. data/perl/HeaderParseService/resources/database/country_abbr.txt +243 -0
  69. data/perl/HeaderParseService/resources/database/countryname.txt +262 -0
  70. data/perl/HeaderParseService/resources/database/dateTopWords +30 -0
  71. data/perl/HeaderParseService/resources/database/degree.txt +67 -0
  72. data/perl/HeaderParseService/resources/database/email.txt +3 -0
  73. data/perl/HeaderParseService/resources/database/excludeWords.txt +40 -0
  74. data/perl/HeaderParseService/resources/database/female-names +4960 -0
  75. data/perl/HeaderParseService/resources/database/firstNames.txt +8448 -0
  76. data/perl/HeaderParseService/resources/database/firstnames.bin +0 -0
  77. data/perl/HeaderParseService/resources/database/firstnames_spec.bin +0 -0
  78. data/perl/HeaderParseService/resources/database/intro.txt +2 -0
  79. data/perl/HeaderParseService/resources/database/keyword.txt +5 -0
  80. data/perl/HeaderParseService/resources/database/keywordTopWords +7 -0
  81. data/perl/HeaderParseService/resources/database/male-names +3906 -0
  82. data/perl/HeaderParseService/resources/database/middleNames.txt +2 -0
  83. data/perl/HeaderParseService/resources/database/month.txt +35 -0
  84. data/perl/HeaderParseService/resources/database/mul +868 -0
  85. data/perl/HeaderParseService/resources/database/mul.label +869 -0
  86. data/perl/HeaderParseService/resources/database/mul.label.old +869 -0
  87. data/perl/HeaderParseService/resources/database/mul.processed +762 -0
  88. data/perl/HeaderParseService/resources/database/mulAuthor +619 -0
  89. data/perl/HeaderParseService/resources/database/mulClassStat +45 -0
  90. data/perl/HeaderParseService/resources/database/nickname.txt +58 -0
  91. data/perl/HeaderParseService/resources/database/nicknames.bin +0 -0
  92. data/perl/HeaderParseService/resources/database/note.txt +121 -0
  93. data/perl/HeaderParseService/resources/database/page.txt +1 -0
  94. data/perl/HeaderParseService/resources/database/phone.txt +9 -0
  95. data/perl/HeaderParseService/resources/database/postcode.txt +54 -0
  96. data/perl/HeaderParseService/resources/database/pubnum.txt +45 -0
  97. data/perl/HeaderParseService/resources/database/statename.bin +0 -0
  98. data/perl/HeaderParseService/resources/database/statename.txt +73 -0
  99. data/perl/HeaderParseService/resources/database/states_and_abbreviations.txt +118 -0
  100. data/perl/HeaderParseService/resources/database/stopwords +438 -0
  101. data/perl/HeaderParseService/resources/database/stopwords.bin +0 -0
  102. data/perl/HeaderParseService/resources/database/surNames.txt +19613 -0
  103. data/perl/HeaderParseService/resources/database/surnames.bin +0 -0
  104. data/perl/HeaderParseService/resources/database/surnames_spec.bin +0 -0
  105. data/perl/HeaderParseService/resources/database/university_list/A.html +167 -0
  106. data/perl/HeaderParseService/resources/database/university_list/B.html +161 -0
  107. data/perl/HeaderParseService/resources/database/university_list/C.html +288 -0
  108. data/perl/HeaderParseService/resources/database/university_list/D.html +115 -0
  109. data/perl/HeaderParseService/resources/database/university_list/E.html +147 -0
  110. data/perl/HeaderParseService/resources/database/university_list/F.html +112 -0
  111. data/perl/HeaderParseService/resources/database/university_list/G.html +115 -0
  112. data/perl/HeaderParseService/resources/database/university_list/H.html +140 -0
  113. data/perl/HeaderParseService/resources/database/university_list/I.html +138 -0
  114. data/perl/HeaderParseService/resources/database/university_list/J.html +82 -0
  115. data/perl/HeaderParseService/resources/database/university_list/K.html +115 -0
  116. data/perl/HeaderParseService/resources/database/university_list/L.html +131 -0
  117. data/perl/HeaderParseService/resources/database/university_list/M.html +201 -0
  118. data/perl/HeaderParseService/resources/database/university_list/N.html +204 -0
  119. data/perl/HeaderParseService/resources/database/university_list/O.html +89 -0
  120. data/perl/HeaderParseService/resources/database/university_list/P.html +125 -0
  121. data/perl/HeaderParseService/resources/database/university_list/Q.html +49 -0
  122. data/perl/HeaderParseService/resources/database/university_list/R.html +126 -0
  123. data/perl/HeaderParseService/resources/database/university_list/S.html +296 -0
  124. data/perl/HeaderParseService/resources/database/university_list/T.html +156 -0
  125. data/perl/HeaderParseService/resources/database/university_list/U.html +800 -0
  126. data/perl/HeaderParseService/resources/database/university_list/V.html +75 -0
  127. data/perl/HeaderParseService/resources/database/university_list/W.html +144 -0
  128. data/perl/HeaderParseService/resources/database/university_list/WCSelect.gif +0 -0
  129. data/perl/HeaderParseService/resources/database/university_list/X.html +44 -0
  130. data/perl/HeaderParseService/resources/database/university_list/Y.html +53 -0
  131. data/perl/HeaderParseService/resources/database/university_list/Z.html +43 -0
  132. data/perl/HeaderParseService/resources/database/university_list/ae.html +31 -0
  133. data/perl/HeaderParseService/resources/database/university_list/am.html +30 -0
  134. data/perl/HeaderParseService/resources/database/university_list/ar.html +35 -0
  135. data/perl/HeaderParseService/resources/database/university_list/at.html +43 -0
  136. data/perl/HeaderParseService/resources/database/university_list/au.html +82 -0
  137. data/perl/HeaderParseService/resources/database/university_list/bd.html +28 -0
  138. data/perl/HeaderParseService/resources/database/university_list/be.html +41 -0
  139. data/perl/HeaderParseService/resources/database/university_list/bg.html +28 -0
  140. data/perl/HeaderParseService/resources/database/university_list/bh.html +28 -0
  141. data/perl/HeaderParseService/resources/database/university_list/blueribbon.gif +0 -0
  142. data/perl/HeaderParseService/resources/database/university_list/bm.html +28 -0
  143. data/perl/HeaderParseService/resources/database/university_list/bn.html +28 -0
  144. data/perl/HeaderParseService/resources/database/university_list/br.html +66 -0
  145. data/perl/HeaderParseService/resources/database/university_list/ca.html +174 -0
  146. data/perl/HeaderParseService/resources/database/university_list/ch.html +52 -0
  147. data/perl/HeaderParseService/resources/database/university_list/cl.html +40 -0
  148. data/perl/HeaderParseService/resources/database/university_list/cn.html +87 -0
  149. data/perl/HeaderParseService/resources/database/university_list/co.html +39 -0
  150. data/perl/HeaderParseService/resources/database/university_list/cr.html +34 -0
  151. data/perl/HeaderParseService/resources/database/university_list/cy.html +34 -0
  152. data/perl/HeaderParseService/resources/database/university_list/cz.html +44 -0
  153. data/perl/HeaderParseService/resources/database/university_list/de.html +128 -0
  154. data/perl/HeaderParseService/resources/database/university_list/dean-mainlink.jpg +0 -0
  155. data/perl/HeaderParseService/resources/database/university_list/dk.html +42 -0
  156. data/perl/HeaderParseService/resources/database/university_list/ec.html +31 -0
  157. data/perl/HeaderParseService/resources/database/university_list/ee.html +30 -0
  158. data/perl/HeaderParseService/resources/database/university_list/eg.html +29 -0
  159. data/perl/HeaderParseService/resources/database/university_list/es.html +68 -0
  160. data/perl/HeaderParseService/resources/database/university_list/et.html +28 -0
  161. data/perl/HeaderParseService/resources/database/university_list/faq.html +147 -0
  162. data/perl/HeaderParseService/resources/database/university_list/fi.html +49 -0
  163. data/perl/HeaderParseService/resources/database/university_list/fj.html +28 -0
  164. data/perl/HeaderParseService/resources/database/university_list/fo.html +28 -0
  165. data/perl/HeaderParseService/resources/database/university_list/fr.html +106 -0
  166. data/perl/HeaderParseService/resources/database/university_list/geog.html +150 -0
  167. data/perl/HeaderParseService/resources/database/university_list/gr.html +38 -0
  168. data/perl/HeaderParseService/resources/database/university_list/gu.html +28 -0
  169. data/perl/HeaderParseService/resources/database/university_list/hk.html +34 -0
  170. data/perl/HeaderParseService/resources/database/university_list/hr.html +28 -0
  171. data/perl/HeaderParseService/resources/database/university_list/hu.html +46 -0
  172. data/perl/HeaderParseService/resources/database/university_list/id.html +29 -0
  173. data/perl/HeaderParseService/resources/database/university_list/ie.html +49 -0
  174. data/perl/HeaderParseService/resources/database/university_list/il.html +35 -0
  175. data/perl/HeaderParseService/resources/database/university_list/in.html +109 -0
  176. data/perl/HeaderParseService/resources/database/university_list/is.html +32 -0
  177. data/perl/HeaderParseService/resources/database/university_list/it.html +75 -0
  178. data/perl/HeaderParseService/resources/database/university_list/jm.html +28 -0
  179. data/perl/HeaderParseService/resources/database/university_list/jo.html +28 -0
  180. data/perl/HeaderParseService/resources/database/university_list/jp.html +155 -0
  181. data/perl/HeaderParseService/resources/database/university_list/kaplan.gif +0 -0
  182. data/perl/HeaderParseService/resources/database/university_list/kr.html +65 -0
  183. data/perl/HeaderParseService/resources/database/university_list/kw.html +28 -0
  184. data/perl/HeaderParseService/resources/database/university_list/lb.html +28 -0
  185. data/perl/HeaderParseService/resources/database/university_list/linkbw2.gif +0 -0
  186. data/perl/HeaderParseService/resources/database/university_list/lk.html +30 -0
  187. data/perl/HeaderParseService/resources/database/university_list/lt.html +31 -0
  188. data/perl/HeaderParseService/resources/database/university_list/lu.html +34 -0
  189. data/perl/HeaderParseService/resources/database/university_list/lv.html +30 -0
  190. data/perl/HeaderParseService/resources/database/university_list/ma.html +28 -0
  191. data/perl/HeaderParseService/resources/database/university_list/maczynski.gif +0 -0
  192. data/perl/HeaderParseService/resources/database/university_list/mirror.tar +0 -0
  193. data/perl/HeaderParseService/resources/database/university_list/mk.html +29 -0
  194. data/perl/HeaderParseService/resources/database/university_list/mo.html +29 -0
  195. data/perl/HeaderParseService/resources/database/university_list/mseawdm.gif +0 -0
  196. data/perl/HeaderParseService/resources/database/university_list/mt.html +28 -0
  197. data/perl/HeaderParseService/resources/database/university_list/mx.html +68 -0
  198. data/perl/HeaderParseService/resources/database/university_list/my.html +39 -0
  199. data/perl/HeaderParseService/resources/database/university_list/ni.html +28 -0
  200. data/perl/HeaderParseService/resources/database/university_list/nl.html +51 -0
  201. data/perl/HeaderParseService/resources/database/university_list/no.html +56 -0
  202. data/perl/HeaderParseService/resources/database/university_list/nz.html +41 -0
  203. data/perl/HeaderParseService/resources/database/university_list/pa.html +31 -0
  204. data/perl/HeaderParseService/resources/database/university_list/pe.html +40 -0
  205. data/perl/HeaderParseService/resources/database/university_list/ph.html +41 -0
  206. data/perl/HeaderParseService/resources/database/university_list/pl.html +51 -0
  207. data/perl/HeaderParseService/resources/database/university_list/pointcom.gif +0 -0
  208. data/perl/HeaderParseService/resources/database/university_list/pr.html +31 -0
  209. data/perl/HeaderParseService/resources/database/university_list/ps.html +28 -0
  210. data/perl/HeaderParseService/resources/database/university_list/pt.html +45 -0
  211. data/perl/HeaderParseService/resources/database/university_list/recognition.html +69 -0
  212. data/perl/HeaderParseService/resources/database/university_list/results.html +71 -0
  213. data/perl/HeaderParseService/resources/database/university_list/ro.html +38 -0
  214. data/perl/HeaderParseService/resources/database/university_list/ru.html +48 -0
  215. data/perl/HeaderParseService/resources/database/university_list/sd.html +28 -0
  216. data/perl/HeaderParseService/resources/database/university_list/se.html +57 -0
  217. data/perl/HeaderParseService/resources/database/university_list/sg.html +33 -0
  218. data/perl/HeaderParseService/resources/database/university_list/si.html +30 -0
  219. data/perl/HeaderParseService/resources/database/university_list/sk.html +35 -0
  220. data/perl/HeaderParseService/resources/database/university_list/th.html +45 -0
  221. data/perl/HeaderParseService/resources/database/university_list/tr.html +44 -0
  222. data/perl/HeaderParseService/resources/database/university_list/tw.html +76 -0
  223. data/perl/HeaderParseService/resources/database/university_list/ua.html +29 -0
  224. data/perl/HeaderParseService/resources/database/university_list/uk.html +168 -0
  225. data/perl/HeaderParseService/resources/database/university_list/univ-full.html +3166 -0
  226. data/perl/HeaderParseService/resources/database/university_list/univ.html +122 -0
  227. data/perl/HeaderParseService/resources/database/university_list/uy.html +31 -0
  228. data/perl/HeaderParseService/resources/database/university_list/ve.html +34 -0
  229. data/perl/HeaderParseService/resources/database/university_list/yu.html +28 -0
  230. data/perl/HeaderParseService/resources/database/university_list/za.html +46 -0
  231. data/perl/HeaderParseService/resources/database/university_list/zm.html +28 -0
  232. data/perl/HeaderParseService/resources/database/university_list.txt +3025 -0
  233. data/perl/HeaderParseService/resources/database/url.txt +1 -0
  234. data/perl/HeaderParseService/resources/database/webTopWords +225 -0
  235. data/perl/HeaderParseService/resources/database/words +45402 -0
  236. data/perl/HeaderParseService/resources/models/10ContextModelfold1 +369 -0
  237. data/perl/HeaderParseService/resources/models/10Modelfold1 +376 -0
  238. data/perl/HeaderParseService/resources/models/11ContextModelfold1 +400 -0
  239. data/perl/HeaderParseService/resources/models/11Modelfold1 +526 -0
  240. data/perl/HeaderParseService/resources/models/12ContextModelfold1 +510 -0
  241. data/perl/HeaderParseService/resources/models/12Modelfold1 +423 -0
  242. data/perl/HeaderParseService/resources/models/13ContextModelfold1 +364 -0
  243. data/perl/HeaderParseService/resources/models/13Modelfold1 +677 -0
  244. data/perl/HeaderParseService/resources/models/14ContextModelfold1 +459 -0
  245. data/perl/HeaderParseService/resources/models/14Modelfold1 +325 -0
  246. data/perl/HeaderParseService/resources/models/15ContextModelfold1 +340 -0
  247. data/perl/HeaderParseService/resources/models/15Modelfold1 +390 -0
  248. data/perl/HeaderParseService/resources/models/1ContextModelfold1 +668 -0
  249. data/perl/HeaderParseService/resources/models/1Modelfold1 +1147 -0
  250. data/perl/HeaderParseService/resources/models/2ContextModelfold1 +755 -0
  251. data/perl/HeaderParseService/resources/models/2Modelfold1 +796 -0
  252. data/perl/HeaderParseService/resources/models/3ContextModelfold1 +1299 -0
  253. data/perl/HeaderParseService/resources/models/3Modelfold1 +1360 -0
  254. data/perl/HeaderParseService/resources/models/4ContextModelfold1 +1062 -0
  255. data/perl/HeaderParseService/resources/models/4Modelfold1 +993 -0
  256. data/perl/HeaderParseService/resources/models/5ContextModelfold1 +1339 -0
  257. data/perl/HeaderParseService/resources/models/5Modelfold1 +2098 -0
  258. data/perl/HeaderParseService/resources/models/6ContextModelfold1 +888 -0
  259. data/perl/HeaderParseService/resources/models/6Modelfold1 +620 -0
  260. data/perl/HeaderParseService/resources/models/7ContextModelfold1 +257 -0
  261. data/perl/HeaderParseService/resources/models/7Modelfold1 +228 -0
  262. data/perl/HeaderParseService/resources/models/8ContextModelfold1 +677 -0
  263. data/perl/HeaderParseService/resources/models/8Modelfold1 +1871 -0
  264. data/perl/HeaderParseService/resources/models/9ContextModelfold1 +198 -0
  265. data/perl/HeaderParseService/resources/models/9Modelfold1 +170 -0
  266. data/perl/HeaderParseService/resources/models/NameSpaceModel +181 -0
  267. data/perl/HeaderParseService/resources/models/NameSpaceTrainF +347 -0
  268. data/perl/HeaderParseService/resources/models/WrapperBaseFeaDict +13460 -0
  269. data/perl/HeaderParseService/resources/models/WrapperContextFeaDict +14045 -0
  270. data/perl/HeaderParseService/resources/models/WrapperSpaceAuthorFeaDict +510 -0
  271. data/perl/HeaderParseService/tmp/tmpVec_1156237246.08016_test1 +23 -0
  272. data/perl/HeaderParseService/tmp/tmpVec_1156237246.08016_test10 +23 -0
  273. data/perl/HeaderParseService/tmp/tmpVec_1156237246.08016_test11 +23 -0
  274. data/perl/HeaderParseService/tmp/tmpVec_1156237246.08016_test12 +23 -0
  275. data/perl/HeaderParseService/tmp/tmpVec_1156237246.08016_test13 +23 -0
  276. data/perl/HeaderParseService/tmp/tmpVec_1156237246.08016_test14 +23 -0
  277. data/perl/HeaderParseService/tmp/tmpVec_1156237246.08016_test15 +23 -0
  278. data/perl/HeaderParseService/tmp/tmpVec_1156237246.08016_test2 +23 -0
  279. data/perl/HeaderParseService/tmp/tmpVec_1156237246.08016_test3 +23 -0
  280. data/perl/HeaderParseService/tmp/tmpVec_1156237246.08016_test4 +23 -0
  281. data/perl/HeaderParseService/tmp/tmpVec_1156237246.08016_test5 +23 -0
  282. data/perl/HeaderParseService/tmp/tmpVec_1156237246.08016_test6 +23 -0
  283. data/perl/HeaderParseService/tmp/tmpVec_1156237246.08016_test7 +23 -0
  284. data/perl/HeaderParseService/tmp/tmpVec_1156237246.08016_test8 +23 -0
  285. data/perl/HeaderParseService/tmp/tmpVec_1156237246.08016_test9 +23 -0
  286. data/perl/HeaderParseService/tmp/tmpVec_914027525.276114_test1 +23 -0
  287. data/perl/HeaderParseService/tmp/tmpVec_914027525.276114_test10 +23 -0
  288. data/perl/HeaderParseService/tmp/tmpVec_914027525.276114_test11 +23 -0
  289. data/perl/HeaderParseService/tmp/tmpVec_914027525.276114_test12 +23 -0
  290. data/perl/HeaderParseService/tmp/tmpVec_914027525.276114_test13 +23 -0
  291. data/perl/HeaderParseService/tmp/tmpVec_914027525.276114_test14 +23 -0
  292. data/perl/HeaderParseService/tmp/tmpVec_914027525.276114_test15 +23 -0
  293. data/perl/HeaderParseService/tmp/tmpVec_914027525.276114_test2 +23 -0
  294. data/perl/HeaderParseService/tmp/tmpVec_914027525.276114_test3 +23 -0
  295. data/perl/HeaderParseService/tmp/tmpVec_914027525.276114_test4 +23 -0
  296. data/perl/HeaderParseService/tmp/tmpVec_914027525.276114_test5 +23 -0
  297. data/perl/HeaderParseService/tmp/tmpVec_914027525.276114_test6 +23 -0
  298. data/perl/HeaderParseService/tmp/tmpVec_914027525.276114_test7 +23 -0
  299. data/perl/HeaderParseService/tmp/tmpVec_914027525.276114_test8 +23 -0
  300. data/perl/HeaderParseService/tmp/tmpVec_914027525.276114_test9 +23 -0
  301. data/perl/ParsCit/README.TXT +82 -0
  302. data/perl/ParsCit/crfpp/traindata/parsCit.template +60 -0
  303. data/perl/ParsCit/crfpp/traindata/parsCit.train.data +12104 -0
  304. data/perl/ParsCit/crfpp/traindata/tagged_references.txt +500 -0
  305. data/perl/ParsCit/lib/CSXUtil/SafeText.pm +140 -0
  306. data/perl/ParsCit/lib/ParsCit/Citation.pm +462 -0
  307. data/perl/ParsCit/lib/ParsCit/CitationContext.pm +132 -0
  308. data/perl/ParsCit/lib/ParsCit/Config.pm +46 -0
  309. data/perl/ParsCit/lib/ParsCit/Controller.pm +306 -0
  310. data/perl/ParsCit/lib/ParsCit/PostProcess.pm +367 -0
  311. data/perl/ParsCit/lib/ParsCit/PreProcess.pm +333 -0
  312. data/perl/ParsCit/lib/ParsCit/Tr2crfpp.pm +331 -0
  313. data/perl/ParsCit/resources/parsCit.model +0 -0
  314. data/perl/ParsCit/resources/parsCitDict.txt +148783 -0
  315. data/perl/extract.pl +199 -0
  316. data/spec/biblicit/cb2bib_spec.rb +48 -0
  317. data/spec/biblicit/citeseer_spec.rb +40 -0
  318. data/spec/fixtures/pdf/10.1.1.109.4049.pdf +0 -0
  319. data/spec/fixtures/pdf/Bagnoli Watts TAR 2010.pdf +0 -0
  320. data/spec/fixtures/pdf/ICINCO_2010.pdf +0 -0
  321. data/spec/spec_helper.rb +3 -0
  322. metadata +474 -0
@@ -0,0 +1,243 @@
1
+ #http://www.kashrut.com/zemanim/countries/
2
+ #Two Letter Country Abbreviations
3
+
4
+ AD Andorra
5
+ AE United Arab Emirates
6
+ AF Afghanistan
7
+ AG Antigua and Barbuda
8
+ AI Anguilla
9
+ AL Albania
10
+ AM Armenia
11
+ AN Netherland Antilles
12
+ AO Angola
13
+ AQ Antarctica
14
+ AR Argentina
15
+ AS American Samoa
16
+ AT Austria
17
+ AU Australia
18
+ AW Aruba
19
+ AZ Azerbaidjan
20
+ BA Bosnia-Herzegovina
21
+ BB Barbados
22
+ BD Bangladesh
23
+ BE Belgium
24
+ BF Burkina Faso
25
+ BG Bulgaria
26
+ BH Bahrain
27
+ BI Burundi
28
+ BJ Benin
29
+ BM Bermuda
30
+ BN Brunei Darussalam
31
+ BO Bolivia
32
+ BR Brazil
33
+ BS Bahamas
34
+ BT Buthan
35
+ BV Bouvet Island
36
+ BW Botswana
37
+ BY Belarus
38
+ BZ Belize
39
+ CA Canada
40
+ CC Cocos (Keeling) Isl.
41
+ CF Central African Rep.
42
+ CG Congo
43
+ CH Switzerland
44
+ CI Ivory Coast
45
+ CK Cook Islands
46
+ CL Chile
47
+ CM Cameroon
48
+ CN China
49
+ CO Colombia
50
+ CR Costa Rica
51
+ CS Czechoslovakia
52
+ CU Cuba
53
+ CV Cape Verde
54
+ CX Christmas Island
55
+ CY Cyprus
56
+ CZ Czech Republic
57
+ DE Germany
58
+ DJ Djibouti
59
+ DK Denmark
60
+ DM Dominica
61
+ DO Dominican Republic
62
+ DZ Algeria
63
+ EC Ecuador
64
+ EE Estonia
65
+ EG Egypt
66
+ EH Western Sahara
67
+ ES Spain
68
+ ET Ethiopia
69
+ FI Finland
70
+ FJ Fiji
71
+ FK Falkland Isl.(Malvinas)
72
+ FM Micronesia
73
+ FO Faroe Islands
74
+ FR France
75
+ FX France (European Ter.)
76
+ GA Gabon
77
+ GB Great Britain (UK)
78
+ GD Grenada
79
+ GE Georgia
80
+ GH Ghana
81
+ GI Gibraltar
82
+ GL Greenland
83
+ GP Guadeloupe (Fr.)
84
+ GQ Equatorial Guinea
85
+ GF Guyana (Fr.)
86
+ GM Gambia
87
+ GN Guinea
88
+ GR Greece
89
+ GT Guatemala
90
+ GU Guam (US)
91
+ GW Guinea Bissau
92
+ GY Guyana
93
+ HK Hong Kong
94
+ HM Heard & McDonald Isl.
95
+ HN Honduras
96
+ HR Croatia
97
+ HT Haiti
98
+ HU Hungary
99
+ ID Indonesia
100
+ IE Ireland
101
+ IL Israel
102
+ IN India
103
+ IO British Indian O. Terr.
104
+ IQ Iraq
105
+ IR Iran
106
+ IS Iceland
107
+ IT Italy
108
+ JM Jamaica
109
+ JO Jordan
110
+ JP Japan
111
+ KE Kenya
112
+ KG Kirgistan
113
+ KH Cambodia
114
+ KI Kiribati
115
+ KM Comoros
116
+ KN St.Kitts Nevis Anguilla
117
+ KP Korea (North)
118
+ KR Korea (South)
119
+ KW Kuwait
120
+ KY Cayman Islands
121
+ KZ Kazachstan
122
+ LA Laos
123
+ LB Lebanon
124
+ LC Saint Lucia
125
+ LI Liechtenstein
126
+ LK Sri Lanka
127
+ LR Liberia
128
+ LS Lesotho
129
+ LT Lithuania
130
+ LU Luxembourg
131
+ LV Latvia
132
+ LY Libya
133
+ MA Morocco
134
+ MC Monaco
135
+ MD Moldavia
136
+ MG Madagascar
137
+ MH Marshall Islands
138
+ ML Mali
139
+ MM Myanmar
140
+ MN Mongolia
141
+ MO Macau
142
+ MP Northern Mariana Isl.
143
+ MQ Martinique (Fr.)
144
+ MR Mauritania
145
+ MS Montserrat
146
+ MT Malta
147
+ MU Mauritius
148
+ MV Maldives
149
+ MW Malawi
150
+ MX Mexico
151
+ MY Malaysia
152
+ MZ Mozambique
153
+ NA Namibia
154
+ NC New Caledonia (Fr.)
155
+ NE Niger
156
+ NF Norfolk Island
157
+ NG Nigeria
158
+ NI Nicaragua
159
+ NL Netherlands
160
+ NO Norway
161
+ NP Nepal
162
+ NR Nauru
163
+ NT Neutral Zone
164
+ NU Niue
165
+ NZ New Zealand
166
+ OM Oman
167
+ PA Panama
168
+ PE Peru
169
+ PF Polynesia (Fr.)
170
+ PG Papua New Guinea
171
+ PH Philippines
172
+ PK Pakistan
173
+ PL Poland
174
+ PM St. Pierre & Miquelon
175
+ PN Pitcairn
176
+ PT Portugal
177
+ PR Puerto Rico (US)
178
+ PW Palau
179
+ PY Paraguay
180
+ QA Qatar
181
+ RE Reunion (Fr.)
182
+ RO Romania
183
+ RU Russian Federation
184
+ RW Rwanda
185
+ SA Saudi Arabia
186
+ SB Solomon Islands
187
+ SC Seychelles
188
+ SD Sudan
189
+ SE Sweden
190
+ SG Singapore
191
+ SH St. Helena
192
+ SI Slovenia
193
+ SJ Svalbard & Jan Mayen Islands
194
+ SK Slovak Republic
195
+ SL Sierra Leone
196
+ SM San Marino
197
+ SN Senegal
198
+ SO Somalia
199
+ SR Suriname
200
+ ST St. Tome and Principe
201
+ SU Soviet Union
202
+ SV El Salvador
203
+ SY Syria
204
+ SZ Swaziland
205
+ TC Turks & Caicos Islands
206
+ TD Chad
207
+ TF French Southern Terr.
208
+ TG Togo
209
+ TH Thailand
210
+ TJ Tadjikistan
211
+ TK Tokelau
212
+ TM Turkmenistan
213
+ TN Tunisia
214
+ TO Tonga
215
+ TP East Timor
216
+ TR Turkey
217
+ TT Trinidad & Tobago
218
+ TV Tuvalu
219
+ TW Taiwan
220
+ TZ Tanzania
221
+ UA Ukraine
222
+ UG Uganda
223
+ UK United Kingdom
224
+ UM US Minor outlying Isl.
225
+ US United States
226
+ UY Uruguay
227
+ UZ Uzbekistan
228
+ VA Vatican City State
229
+ VC St.Vincent & Grenadines
230
+ VE Venezuela
231
+ VG Virgin Islands (British)
232
+ VI Virgin Islands (US)
233
+ VN Vietnam
234
+ VU Vanuatu
235
+ WF Wallis & Futuna Islands
236
+ WS Samoa
237
+ YE Yemen
238
+ YU Yugoslavia
239
+ ZA South Africa
240
+ ZM Zambia
241
+ ZR Zaire
242
+ ZW Zimbabwe
243
+
@@ -0,0 +1,262 @@
1
+ Afghanistan
2
+ Albania
3
+ Algeria
4
+ American Samoa
5
+ Andorra
6
+ Angola
7
+ Anguilla
8
+ Antarctica
9
+ Antigua and Barbuda
10
+ Argentina
11
+ Armenia
12
+ Aruba
13
+ Ashmore and Cartier Islands
14
+ Australia
15
+ Austria
16
+ Azerbaijan
17
+ The Bahamas
18
+ Bahrain
19
+ Baker Island
20
+ Bangladesh
21
+ Barbados
22
+ Bassas da India
23
+ Belarus
24
+ Belgium
25
+ Belize
26
+ Benin
27
+ Bermuda
28
+ Bhutan
29
+ Bolivia
30
+ Bosnia and Herzegovina
31
+ Botswana
32
+ Bouvet Island
33
+ Brazil
34
+ British Indian Ocean Territory
35
+ British Virgin Islands
36
+ Brunei
37
+ Bulgaria
38
+ Burkina Faso
39
+ Burma
40
+ Burundi
41
+ Cambodia
42
+ Cameroon
43
+ Canada
44
+ Cape Verde
45
+ Cayman Islands
46
+ Central African Republic
47
+ Chad
48
+ Chile
49
+ China
50
+ Christmas Island
51
+ Clipperton Island
52
+ Cocos (Keeling) Islands
53
+ Colombia
54
+ Comoros
55
+ Democratic Republic of the Congo
56
+ Republic of the Congo
57
+ Cook Islands
58
+ Coral Sea Islands
59
+ Costa Rica
60
+ Cote d'Ivoire
61
+ Croatia
62
+ Cuba
63
+ Cyprus
64
+ Czech Republic
65
+ Denmark
66
+ Djibouti
67
+ Dominica
68
+ Dominican Republic
69
+ Ecuador
70
+ Egypt
71
+ El Salvador
72
+ Equatorial Guinea
73
+ Eritrea
74
+ Estonia
75
+ Ethiopia
76
+ Europa Island
77
+ Falkland Islands (Islas Malvinas)
78
+ Faroe Islands
79
+ Fiji
80
+ Finland
81
+ France
82
+ French Guiana
83
+ French Polynesia
84
+ French Southern and Antarctic Lands
85
+ Gabon
86
+ The Gambia
87
+ Gaza Strip
88
+ Georgia
89
+ Germany
90
+ Ghana
91
+ Gibraltar
92
+ Glorioso Islands
93
+ Greece
94
+ Greenland
95
+ Grenada
96
+ Guadeloupe
97
+ Guam
98
+ Guatemala
99
+ Guernsey
100
+ Guinea
101
+ Guinea-Bissau
102
+ Guyana
103
+ Haiti
104
+ Territory of Heard Island and McDonald Islands
105
+ Holy See
106
+ Honduras
107
+ Hong Kong
108
+ Howland Island
109
+ Hungary
110
+ Iceland
111
+ India
112
+ Republic of Indonesia
113
+ Iran
114
+ Iraq
115
+ Ireland
116
+ Israel
117
+ Italy
118
+ Jamaica
119
+ Jan Mayen
120
+ Japan
121
+ Jarvis Island
122
+ Jersey
123
+ Johnston Atoll
124
+ Jordan
125
+ Juan de Nova Island
126
+ Kazakhstan
127
+ Kenya
128
+ Kingman Reef
129
+ Kiribati
130
+ North Korea
131
+ South Korea
132
+ Kuwait
133
+ Kyrgyzstan
134
+ Laos
135
+ Latvia
136
+ Lebanon
137
+ Lesotho
138
+ Liberia
139
+ Libya
140
+ Liechtenstein
141
+ Lithuania
142
+ Luxembourg
143
+ Macau
144
+ Republika Makedonija
145
+ Madagascar
146
+ Malawi
147
+ Malaysia
148
+ Maldives
149
+ Mali
150
+ Malta
151
+ Isle of Man
152
+ Marshall Islands
153
+ Martinique
154
+ Mauritania
155
+ Mauritius
156
+ Mayotte
157
+ Mexico
158
+ Federated States of Micronesia
159
+ Midway Islands
160
+ Moldova
161
+ Monaco
162
+ Mongolia
163
+ Montserrat
164
+ Morocco
165
+ Mozambique
166
+ Namibia
167
+ Nauru
168
+ Navassa Island
169
+ Nepal
170
+ Netherlands
171
+ Netherlands Antilles
172
+ New Caledonia
173
+ New Zealand
174
+ Nicaragua
175
+ Niger
176
+ Nigeria
177
+ Niue
178
+ Norfolk Island
179
+ Northern Mariana Islands
180
+ Norway
181
+ Oman
182
+ Pakistan
183
+ Palau
184
+ Palmyra Atoll
185
+ Panama
186
+ Papua New Guinea
187
+ Paracel Islands
188
+ Paraguay
189
+ Peru
190
+ Philippines
191
+ Pitcairn Islands
192
+ Poland
193
+ Portugal
194
+ Puerto Rico
195
+ Qatar
196
+ Reunion
197
+ Romania
198
+ Russia
199
+ Rwanda
200
+ Saint Helena
201
+ Saint Kitts and Nevis
202
+ Saint Lucia
203
+ Territorial Collectivity of Saint Pierre and Miquelon
204
+ Saint Vincent and the Grenadines
205
+ Samoa
206
+ San Marino
207
+ Sao Tome and Principe
208
+ Saudi Arabia
209
+ Senegal
210
+ Seychelles
211
+ Sierra Leone
212
+ Singapore
213
+ Slovakia
214
+ Slovenia
215
+ Solomon Islands
216
+ Somalia
217
+ South Africa
218
+ South Georgia and the South Sandwich Islands
219
+ Spain
220
+ Spratly Islands
221
+ Sri Lanka
222
+ Sudan
223
+ Suriname
224
+ Svalbard
225
+ Swaziland
226
+ Sweden
227
+ Switzerland
228
+ Syria
229
+ Taiwan
230
+ Tajikistan
231
+ Tanzania
232
+ Thailand
233
+ Togo
234
+ Tokelau
235
+ Tonga
236
+ Trinidad and Tobago
237
+ Tromelin Island
238
+ Tunisia
239
+ Turkey
240
+ Turkmenistan
241
+ Turks and Caicos Islands
242
+ Tuvalu
243
+ Uganda
244
+ Ukraine
245
+ United Arab Emirates
246
+ United Kingdom
247
+ United States
248
+ USA
249
+ Uruguay
250
+ Uzbekistan
251
+ Vanuatu
252
+ Venezuela
253
+ Vietnam
254
+ Virgin Islands
255
+ Wake Island
256
+ Wallis and Futuna
257
+ West Bank
258
+ Western Sahara
259
+ Yemen
260
+ Yugoslavia
261
+ Zambia
262
+ Zimbabwe
@@ -0,0 +1,30 @@
1
+ 21 october
2
+ 20 june
3
+ 20 january
4
+ 16 august
5
+ 15 july
6
+ 12 march
7
+ 11 february
8
+ 10 april
9
+ 9 december
10
+ 9 september
11
+ 8 november
12
+ 6 revise
13
+ 2 spring
14
+ 2 revise july
15
+ 1 correct dec
16
+ 1 mar
17
+ 1 dec
18
+ 1 received
19
+ 1 sept
20
+ 1 sept.
21
+ 1 correct
22
+ 1 revise august
23
+ 1 jan
24
+ 1 update
25
+ 1 nov
26
+ 1 submit
27
+ 1 juillet
28
+ 1 aug
29
+ 1 revise aug
30
+ 1 received september
@@ -0,0 +1,67 @@
1
+ 20 science
2
+ 17 degree
3
+ 17 requirement
4
+ 16 partial
5
+ 16 thesis
6
+ 16 philosophy
7
+ 15 submit
8
+ 14 partial fulfillment
9
+ 14 fulfillment
10
+ 13 doctor
11
+ 11 professor
12
+ 10 department
13
+ 10 committee
14
+ 9 engineering
15
+ 8 graduate
16
+ 7 electrical
17
+ 6 dissertation
18
+ 6 author
19
+ 5 grant
20
+ 5 institute
21
+ 5 master
22
+ 5 dr
23
+ 4 university
24
+ 4 technology
25
+ 3 distribute
26
+ 3 thesis submit
27
+ 3 document
28
+ 3 copy
29
+ 3 accept
30
+ 3 certify
31
+ 3 part
32
+ 3 dissertation submit
33
+ 3 reproduce
34
+ 3 thesis committee
35
+ 3 thesis document
36
+ 3 chair
37
+ 3 permission
38
+ 2 approve
39
+ 2 paper
40
+ 2 technology science
41
+ 2 supervise
42
+ 2 faculty
43
+ 2 graduate division
44
+ 2 chairman department
45
+ 2 proposal
46
+ 2 thesis proposal
47
+ 2 universit
48
+ 2 public
49
+ 2 technology bombay
50
+ 2 school
51
+ 2 division
52
+ 2 graduate school
53
+ 2 rights reserved
54
+ 2 department committee
55
+ 2 reserved
56
+ 2 tech
57
+ 2 satisfaction
58
+ 2 partial satisfaction
59
+ 2 present
60
+ 2 rights
61
+ 2 thesis supervise
62
+ 2 chairman
63
+ 1 approve thesis
64
+ 1 distribute public
65
+ 1 graduate college
66
+ 1 senior thesis
67
+ 1 bachelor
@@ -0,0 +1,3 @@
1
+ email
2
+ e-mail
3
+
@@ -0,0 +1,40 @@
1
+ John Wiley # publisher
2
+ san # not a valid last name!
3
+ San Francisco # place
4
+ John Benjamins # publisher
5
+ Murray Hill # place
6
+ Morgan Kaufman # publisher
7
+ Morgan Kaufmann # publisher
8
+ Morgan Kauf
9
+ Moffett Field # place
10
+ conf # not a valid last name!
11
+ yahoo # not a valid name of any type!
12
+ florida # a place
13
+ miami # a place
14
+ Washington Post # a newspaper
15
+ D. C. Washington # Washington, DC as a name...
16
+ yeah # not valid!
17
+ Press
18
+ B. C. Vancouver
19
+ D. C. Washington
20
+ John Hopkins
21
+ P. O. Box
22
+ U. C. Berkeley
23
+ U. K. London
24
+ West Lafayette
25
+ Addison Wesley
26
+ Ellis Horwood
27
+ Prentice Hall
28
+ Van Nostrand Reinhold
29
+ W. H. Freeman
30
+ May France
31
+ J Royal Stat
32
+ Link Grammar
33
+ Link Fix
34
+ Montague Grammar
35
+ Pembroke Street
36
+ Penn Wall
37
+ Penn Treebank Wall
38
+ U. S. News
39
+ Red Cross
40
+ Walt Disney