biblicit 1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (322) hide show
  1. data/.gitignore +3 -0
  2. data/.rspec +1 -0
  3. data/Gemfile +6 -0
  4. data/LICENSE.TXT +176 -0
  5. data/README.md +120 -0
  6. data/Rakefile +8 -0
  7. data/biblicit.gemspec +33 -0
  8. data/lib/biblicit/cb2bib.rb +83 -0
  9. data/lib/biblicit/citeseer.rb +53 -0
  10. data/lib/biblicit/extractor.rb +37 -0
  11. data/lib/biblicit.rb +6 -0
  12. data/perl/DocFilter/lib/CSXUtil/SafeText.pm +140 -0
  13. data/perl/DocFilter/lib/DocFilter/Config.pm +35 -0
  14. data/perl/DocFilter/lib/DocFilter/Filter.pm +51 -0
  15. data/perl/FileConversionService/README.TXT +11 -0
  16. data/perl/FileConversionService/converters/PDFBox/pdfbox-app-1.7.1.jar +0 -0
  17. data/perl/FileConversionService/lib/CSXUtil/SafeText.pm +140 -0
  18. data/perl/FileConversionService/lib/FileConverter/CheckSum.pm +77 -0
  19. data/perl/FileConversionService/lib/FileConverter/Compression.pm +137 -0
  20. data/perl/FileConversionService/lib/FileConverter/Config.pm +57 -0
  21. data/perl/FileConversionService/lib/FileConverter/Controller.pm +191 -0
  22. data/perl/FileConversionService/lib/FileConverter/JODConverter.pm +61 -0
  23. data/perl/FileConversionService/lib/FileConverter/PDFBox.pm +69 -0
  24. data/perl/FileConversionService/lib/FileConverter/PSConverter.pm +69 -0
  25. data/perl/FileConversionService/lib/FileConverter/PSToText.pm +88 -0
  26. data/perl/FileConversionService/lib/FileConverter/Prescript.pm +68 -0
  27. data/perl/FileConversionService/lib/FileConverter/TET.pm +75 -0
  28. data/perl/FileConversionService/lib/FileConverter/Utils.pm +130 -0
  29. data/perl/HeaderParseService/README.TXT +80 -0
  30. data/perl/HeaderParseService/lib/CSXUtil/SafeText.pm +140 -0
  31. data/perl/HeaderParseService/lib/HeaderParse/API/AssembleXMLMetadata.pm +968 -0
  32. data/perl/HeaderParseService/lib/HeaderParse/API/Function.pm +2016 -0
  33. data/perl/HeaderParseService/lib/HeaderParse/API/LoadInformation.pm +444 -0
  34. data/perl/HeaderParseService/lib/HeaderParse/API/MultiClassChunking.pm +409 -0
  35. data/perl/HeaderParseService/lib/HeaderParse/API/NamePatternMatch.pm +537 -0
  36. data/perl/HeaderParseService/lib/HeaderParse/API/Parser.pm +68 -0
  37. data/perl/HeaderParseService/lib/HeaderParse/API/ParserMethods.pm +1880 -0
  38. data/perl/HeaderParseService/lib/HeaderParse/Config/API_Config.pm +46 -0
  39. data/perl/HeaderParseService/resources/data/EbizHeaders.txt +24330 -0
  40. data/perl/HeaderParseService/resources/data/EbizHeaders.txt.parsed +27506 -0
  41. data/perl/HeaderParseService/resources/data/EbizHeaders.txt.parsed.old +26495 -0
  42. data/perl/HeaderParseService/resources/data/tagged_headers.txt +40668 -0
  43. data/perl/HeaderParseService/resources/data/test_header.txt +31 -0
  44. data/perl/HeaderParseService/resources/data/test_header.txt.parsed +31 -0
  45. data/perl/HeaderParseService/resources/database/50states +60 -0
  46. data/perl/HeaderParseService/resources/database/AddrTopWords.txt +17 -0
  47. data/perl/HeaderParseService/resources/database/AffiTopWords.txt +35 -0
  48. data/perl/HeaderParseService/resources/database/AffiTopWordsAll.txt +533 -0
  49. data/perl/HeaderParseService/resources/database/ChineseSurNames.txt +276 -0
  50. data/perl/HeaderParseService/resources/database/Csurnames.bin +0 -0
  51. data/perl/HeaderParseService/resources/database/Csurnames_spec.bin +0 -0
  52. data/perl/HeaderParseService/resources/database/DomainSuffixes.txt +242 -0
  53. data/perl/HeaderParseService/resources/database/LabeledHeader +18 -0
  54. data/perl/HeaderParseService/resources/database/README +2 -0
  55. data/perl/HeaderParseService/resources/database/TrainMulClassLines +254 -0
  56. data/perl/HeaderParseService/resources/database/TrainMulClassLines1 +510 -0
  57. data/perl/HeaderParseService/resources/database/abstract.txt +1 -0
  58. data/perl/HeaderParseService/resources/database/abstractTopWords +9 -0
  59. data/perl/HeaderParseService/resources/database/addr.txt +28 -0
  60. data/perl/HeaderParseService/resources/database/affi.txt +34 -0
  61. data/perl/HeaderParseService/resources/database/affis.bin +0 -0
  62. data/perl/HeaderParseService/resources/database/all_namewords_spec.bin +0 -0
  63. data/perl/HeaderParseService/resources/database/allnamewords.bin +0 -0
  64. data/perl/HeaderParseService/resources/database/cities_US.txt +4512 -0
  65. data/perl/HeaderParseService/resources/database/cities_world.txt +4463 -0
  66. data/perl/HeaderParseService/resources/database/city.txt +3150 -0
  67. data/perl/HeaderParseService/resources/database/cityname.txt +3151 -0
  68. data/perl/HeaderParseService/resources/database/country_abbr.txt +243 -0
  69. data/perl/HeaderParseService/resources/database/countryname.txt +262 -0
  70. data/perl/HeaderParseService/resources/database/dateTopWords +30 -0
  71. data/perl/HeaderParseService/resources/database/degree.txt +67 -0
  72. data/perl/HeaderParseService/resources/database/email.txt +3 -0
  73. data/perl/HeaderParseService/resources/database/excludeWords.txt +40 -0
  74. data/perl/HeaderParseService/resources/database/female-names +4960 -0
  75. data/perl/HeaderParseService/resources/database/firstNames.txt +8448 -0
  76. data/perl/HeaderParseService/resources/database/firstnames.bin +0 -0
  77. data/perl/HeaderParseService/resources/database/firstnames_spec.bin +0 -0
  78. data/perl/HeaderParseService/resources/database/intro.txt +2 -0
  79. data/perl/HeaderParseService/resources/database/keyword.txt +5 -0
  80. data/perl/HeaderParseService/resources/database/keywordTopWords +7 -0
  81. data/perl/HeaderParseService/resources/database/male-names +3906 -0
  82. data/perl/HeaderParseService/resources/database/middleNames.txt +2 -0
  83. data/perl/HeaderParseService/resources/database/month.txt +35 -0
  84. data/perl/HeaderParseService/resources/database/mul +868 -0
  85. data/perl/HeaderParseService/resources/database/mul.label +869 -0
  86. data/perl/HeaderParseService/resources/database/mul.label.old +869 -0
  87. data/perl/HeaderParseService/resources/database/mul.processed +762 -0
  88. data/perl/HeaderParseService/resources/database/mulAuthor +619 -0
  89. data/perl/HeaderParseService/resources/database/mulClassStat +45 -0
  90. data/perl/HeaderParseService/resources/database/nickname.txt +58 -0
  91. data/perl/HeaderParseService/resources/database/nicknames.bin +0 -0
  92. data/perl/HeaderParseService/resources/database/note.txt +121 -0
  93. data/perl/HeaderParseService/resources/database/page.txt +1 -0
  94. data/perl/HeaderParseService/resources/database/phone.txt +9 -0
  95. data/perl/HeaderParseService/resources/database/postcode.txt +54 -0
  96. data/perl/HeaderParseService/resources/database/pubnum.txt +45 -0
  97. data/perl/HeaderParseService/resources/database/statename.bin +0 -0
  98. data/perl/HeaderParseService/resources/database/statename.txt +73 -0
  99. data/perl/HeaderParseService/resources/database/states_and_abbreviations.txt +118 -0
  100. data/perl/HeaderParseService/resources/database/stopwords +438 -0
  101. data/perl/HeaderParseService/resources/database/stopwords.bin +0 -0
  102. data/perl/HeaderParseService/resources/database/surNames.txt +19613 -0
  103. data/perl/HeaderParseService/resources/database/surnames.bin +0 -0
  104. data/perl/HeaderParseService/resources/database/surnames_spec.bin +0 -0
  105. data/perl/HeaderParseService/resources/database/university_list/A.html +167 -0
  106. data/perl/HeaderParseService/resources/database/university_list/B.html +161 -0
  107. data/perl/HeaderParseService/resources/database/university_list/C.html +288 -0
  108. data/perl/HeaderParseService/resources/database/university_list/D.html +115 -0
  109. data/perl/HeaderParseService/resources/database/university_list/E.html +147 -0
  110. data/perl/HeaderParseService/resources/database/university_list/F.html +112 -0
  111. data/perl/HeaderParseService/resources/database/university_list/G.html +115 -0
  112. data/perl/HeaderParseService/resources/database/university_list/H.html +140 -0
  113. data/perl/HeaderParseService/resources/database/university_list/I.html +138 -0
  114. data/perl/HeaderParseService/resources/database/university_list/J.html +82 -0
  115. data/perl/HeaderParseService/resources/database/university_list/K.html +115 -0
  116. data/perl/HeaderParseService/resources/database/university_list/L.html +131 -0
  117. data/perl/HeaderParseService/resources/database/university_list/M.html +201 -0
  118. data/perl/HeaderParseService/resources/database/university_list/N.html +204 -0
  119. data/perl/HeaderParseService/resources/database/university_list/O.html +89 -0
  120. data/perl/HeaderParseService/resources/database/university_list/P.html +125 -0
  121. data/perl/HeaderParseService/resources/database/university_list/Q.html +49 -0
  122. data/perl/HeaderParseService/resources/database/university_list/R.html +126 -0
  123. data/perl/HeaderParseService/resources/database/university_list/S.html +296 -0
  124. data/perl/HeaderParseService/resources/database/university_list/T.html +156 -0
  125. data/perl/HeaderParseService/resources/database/university_list/U.html +800 -0
  126. data/perl/HeaderParseService/resources/database/university_list/V.html +75 -0
  127. data/perl/HeaderParseService/resources/database/university_list/W.html +144 -0
  128. data/perl/HeaderParseService/resources/database/university_list/WCSelect.gif +0 -0
  129. data/perl/HeaderParseService/resources/database/university_list/X.html +44 -0
  130. data/perl/HeaderParseService/resources/database/university_list/Y.html +53 -0
  131. data/perl/HeaderParseService/resources/database/university_list/Z.html +43 -0
  132. data/perl/HeaderParseService/resources/database/university_list/ae.html +31 -0
  133. data/perl/HeaderParseService/resources/database/university_list/am.html +30 -0
  134. data/perl/HeaderParseService/resources/database/university_list/ar.html +35 -0
  135. data/perl/HeaderParseService/resources/database/university_list/at.html +43 -0
  136. data/perl/HeaderParseService/resources/database/university_list/au.html +82 -0
  137. data/perl/HeaderParseService/resources/database/university_list/bd.html +28 -0
  138. data/perl/HeaderParseService/resources/database/university_list/be.html +41 -0
  139. data/perl/HeaderParseService/resources/database/university_list/bg.html +28 -0
  140. data/perl/HeaderParseService/resources/database/university_list/bh.html +28 -0
  141. data/perl/HeaderParseService/resources/database/university_list/blueribbon.gif +0 -0
  142. data/perl/HeaderParseService/resources/database/university_list/bm.html +28 -0
  143. data/perl/HeaderParseService/resources/database/university_list/bn.html +28 -0
  144. data/perl/HeaderParseService/resources/database/university_list/br.html +66 -0
  145. data/perl/HeaderParseService/resources/database/university_list/ca.html +174 -0
  146. data/perl/HeaderParseService/resources/database/university_list/ch.html +52 -0
  147. data/perl/HeaderParseService/resources/database/university_list/cl.html +40 -0
  148. data/perl/HeaderParseService/resources/database/university_list/cn.html +87 -0
  149. data/perl/HeaderParseService/resources/database/university_list/co.html +39 -0
  150. data/perl/HeaderParseService/resources/database/university_list/cr.html +34 -0
  151. data/perl/HeaderParseService/resources/database/university_list/cy.html +34 -0
  152. data/perl/HeaderParseService/resources/database/university_list/cz.html +44 -0
  153. data/perl/HeaderParseService/resources/database/university_list/de.html +128 -0
  154. data/perl/HeaderParseService/resources/database/university_list/dean-mainlink.jpg +0 -0
  155. data/perl/HeaderParseService/resources/database/university_list/dk.html +42 -0
  156. data/perl/HeaderParseService/resources/database/university_list/ec.html +31 -0
  157. data/perl/HeaderParseService/resources/database/university_list/ee.html +30 -0
  158. data/perl/HeaderParseService/resources/database/university_list/eg.html +29 -0
  159. data/perl/HeaderParseService/resources/database/university_list/es.html +68 -0
  160. data/perl/HeaderParseService/resources/database/university_list/et.html +28 -0
  161. data/perl/HeaderParseService/resources/database/university_list/faq.html +147 -0
  162. data/perl/HeaderParseService/resources/database/university_list/fi.html +49 -0
  163. data/perl/HeaderParseService/resources/database/university_list/fj.html +28 -0
  164. data/perl/HeaderParseService/resources/database/university_list/fo.html +28 -0
  165. data/perl/HeaderParseService/resources/database/university_list/fr.html +106 -0
  166. data/perl/HeaderParseService/resources/database/university_list/geog.html +150 -0
  167. data/perl/HeaderParseService/resources/database/university_list/gr.html +38 -0
  168. data/perl/HeaderParseService/resources/database/university_list/gu.html +28 -0
  169. data/perl/HeaderParseService/resources/database/university_list/hk.html +34 -0
  170. data/perl/HeaderParseService/resources/database/university_list/hr.html +28 -0
  171. data/perl/HeaderParseService/resources/database/university_list/hu.html +46 -0
  172. data/perl/HeaderParseService/resources/database/university_list/id.html +29 -0
  173. data/perl/HeaderParseService/resources/database/university_list/ie.html +49 -0
  174. data/perl/HeaderParseService/resources/database/university_list/il.html +35 -0
  175. data/perl/HeaderParseService/resources/database/university_list/in.html +109 -0
  176. data/perl/HeaderParseService/resources/database/university_list/is.html +32 -0
  177. data/perl/HeaderParseService/resources/database/university_list/it.html +75 -0
  178. data/perl/HeaderParseService/resources/database/university_list/jm.html +28 -0
  179. data/perl/HeaderParseService/resources/database/university_list/jo.html +28 -0
  180. data/perl/HeaderParseService/resources/database/university_list/jp.html +155 -0
  181. data/perl/HeaderParseService/resources/database/university_list/kaplan.gif +0 -0
  182. data/perl/HeaderParseService/resources/database/university_list/kr.html +65 -0
  183. data/perl/HeaderParseService/resources/database/university_list/kw.html +28 -0
  184. data/perl/HeaderParseService/resources/database/university_list/lb.html +28 -0
  185. data/perl/HeaderParseService/resources/database/university_list/linkbw2.gif +0 -0
  186. data/perl/HeaderParseService/resources/database/university_list/lk.html +30 -0
  187. data/perl/HeaderParseService/resources/database/university_list/lt.html +31 -0
  188. data/perl/HeaderParseService/resources/database/university_list/lu.html +34 -0
  189. data/perl/HeaderParseService/resources/database/university_list/lv.html +30 -0
  190. data/perl/HeaderParseService/resources/database/university_list/ma.html +28 -0
  191. data/perl/HeaderParseService/resources/database/university_list/maczynski.gif +0 -0
  192. data/perl/HeaderParseService/resources/database/university_list/mirror.tar +0 -0
  193. data/perl/HeaderParseService/resources/database/university_list/mk.html +29 -0
  194. data/perl/HeaderParseService/resources/database/university_list/mo.html +29 -0
  195. data/perl/HeaderParseService/resources/database/university_list/mseawdm.gif +0 -0
  196. data/perl/HeaderParseService/resources/database/university_list/mt.html +28 -0
  197. data/perl/HeaderParseService/resources/database/university_list/mx.html +68 -0
  198. data/perl/HeaderParseService/resources/database/university_list/my.html +39 -0
  199. data/perl/HeaderParseService/resources/database/university_list/ni.html +28 -0
  200. data/perl/HeaderParseService/resources/database/university_list/nl.html +51 -0
  201. data/perl/HeaderParseService/resources/database/university_list/no.html +56 -0
  202. data/perl/HeaderParseService/resources/database/university_list/nz.html +41 -0
  203. data/perl/HeaderParseService/resources/database/university_list/pa.html +31 -0
  204. data/perl/HeaderParseService/resources/database/university_list/pe.html +40 -0
  205. data/perl/HeaderParseService/resources/database/university_list/ph.html +41 -0
  206. data/perl/HeaderParseService/resources/database/university_list/pl.html +51 -0
  207. data/perl/HeaderParseService/resources/database/university_list/pointcom.gif +0 -0
  208. data/perl/HeaderParseService/resources/database/university_list/pr.html +31 -0
  209. data/perl/HeaderParseService/resources/database/university_list/ps.html +28 -0
  210. data/perl/HeaderParseService/resources/database/university_list/pt.html +45 -0
  211. data/perl/HeaderParseService/resources/database/university_list/recognition.html +69 -0
  212. data/perl/HeaderParseService/resources/database/university_list/results.html +71 -0
  213. data/perl/HeaderParseService/resources/database/university_list/ro.html +38 -0
  214. data/perl/HeaderParseService/resources/database/university_list/ru.html +48 -0
  215. data/perl/HeaderParseService/resources/database/university_list/sd.html +28 -0
  216. data/perl/HeaderParseService/resources/database/university_list/se.html +57 -0
  217. data/perl/HeaderParseService/resources/database/university_list/sg.html +33 -0
  218. data/perl/HeaderParseService/resources/database/university_list/si.html +30 -0
  219. data/perl/HeaderParseService/resources/database/university_list/sk.html +35 -0
  220. data/perl/HeaderParseService/resources/database/university_list/th.html +45 -0
  221. data/perl/HeaderParseService/resources/database/university_list/tr.html +44 -0
  222. data/perl/HeaderParseService/resources/database/university_list/tw.html +76 -0
  223. data/perl/HeaderParseService/resources/database/university_list/ua.html +29 -0
  224. data/perl/HeaderParseService/resources/database/university_list/uk.html +168 -0
  225. data/perl/HeaderParseService/resources/database/university_list/univ-full.html +3166 -0
  226. data/perl/HeaderParseService/resources/database/university_list/univ.html +122 -0
  227. data/perl/HeaderParseService/resources/database/university_list/uy.html +31 -0
  228. data/perl/HeaderParseService/resources/database/university_list/ve.html +34 -0
  229. data/perl/HeaderParseService/resources/database/university_list/yu.html +28 -0
  230. data/perl/HeaderParseService/resources/database/university_list/za.html +46 -0
  231. data/perl/HeaderParseService/resources/database/university_list/zm.html +28 -0
  232. data/perl/HeaderParseService/resources/database/university_list.txt +3025 -0
  233. data/perl/HeaderParseService/resources/database/url.txt +1 -0
  234. data/perl/HeaderParseService/resources/database/webTopWords +225 -0
  235. data/perl/HeaderParseService/resources/database/words +45402 -0
  236. data/perl/HeaderParseService/resources/models/10ContextModelfold1 +369 -0
  237. data/perl/HeaderParseService/resources/models/10Modelfold1 +376 -0
  238. data/perl/HeaderParseService/resources/models/11ContextModelfold1 +400 -0
  239. data/perl/HeaderParseService/resources/models/11Modelfold1 +526 -0
  240. data/perl/HeaderParseService/resources/models/12ContextModelfold1 +510 -0
  241. data/perl/HeaderParseService/resources/models/12Modelfold1 +423 -0
  242. data/perl/HeaderParseService/resources/models/13ContextModelfold1 +364 -0
  243. data/perl/HeaderParseService/resources/models/13Modelfold1 +677 -0
  244. data/perl/HeaderParseService/resources/models/14ContextModelfold1 +459 -0
  245. data/perl/HeaderParseService/resources/models/14Modelfold1 +325 -0
  246. data/perl/HeaderParseService/resources/models/15ContextModelfold1 +340 -0
  247. data/perl/HeaderParseService/resources/models/15Modelfold1 +390 -0
  248. data/perl/HeaderParseService/resources/models/1ContextModelfold1 +668 -0
  249. data/perl/HeaderParseService/resources/models/1Modelfold1 +1147 -0
  250. data/perl/HeaderParseService/resources/models/2ContextModelfold1 +755 -0
  251. data/perl/HeaderParseService/resources/models/2Modelfold1 +796 -0
  252. data/perl/HeaderParseService/resources/models/3ContextModelfold1 +1299 -0
  253. data/perl/HeaderParseService/resources/models/3Modelfold1 +1360 -0
  254. data/perl/HeaderParseService/resources/models/4ContextModelfold1 +1062 -0
  255. data/perl/HeaderParseService/resources/models/4Modelfold1 +993 -0
  256. data/perl/HeaderParseService/resources/models/5ContextModelfold1 +1339 -0
  257. data/perl/HeaderParseService/resources/models/5Modelfold1 +2098 -0
  258. data/perl/HeaderParseService/resources/models/6ContextModelfold1 +888 -0
  259. data/perl/HeaderParseService/resources/models/6Modelfold1 +620 -0
  260. data/perl/HeaderParseService/resources/models/7ContextModelfold1 +257 -0
  261. data/perl/HeaderParseService/resources/models/7Modelfold1 +228 -0
  262. data/perl/HeaderParseService/resources/models/8ContextModelfold1 +677 -0
  263. data/perl/HeaderParseService/resources/models/8Modelfold1 +1871 -0
  264. data/perl/HeaderParseService/resources/models/9ContextModelfold1 +198 -0
  265. data/perl/HeaderParseService/resources/models/9Modelfold1 +170 -0
  266. data/perl/HeaderParseService/resources/models/NameSpaceModel +181 -0
  267. data/perl/HeaderParseService/resources/models/NameSpaceTrainF +347 -0
  268. data/perl/HeaderParseService/resources/models/WrapperBaseFeaDict +13460 -0
  269. data/perl/HeaderParseService/resources/models/WrapperContextFeaDict +14045 -0
  270. data/perl/HeaderParseService/resources/models/WrapperSpaceAuthorFeaDict +510 -0
  271. data/perl/HeaderParseService/tmp/tmpVec_1156237246.08016_test1 +23 -0
  272. data/perl/HeaderParseService/tmp/tmpVec_1156237246.08016_test10 +23 -0
  273. data/perl/HeaderParseService/tmp/tmpVec_1156237246.08016_test11 +23 -0
  274. data/perl/HeaderParseService/tmp/tmpVec_1156237246.08016_test12 +23 -0
  275. data/perl/HeaderParseService/tmp/tmpVec_1156237246.08016_test13 +23 -0
  276. data/perl/HeaderParseService/tmp/tmpVec_1156237246.08016_test14 +23 -0
  277. data/perl/HeaderParseService/tmp/tmpVec_1156237246.08016_test15 +23 -0
  278. data/perl/HeaderParseService/tmp/tmpVec_1156237246.08016_test2 +23 -0
  279. data/perl/HeaderParseService/tmp/tmpVec_1156237246.08016_test3 +23 -0
  280. data/perl/HeaderParseService/tmp/tmpVec_1156237246.08016_test4 +23 -0
  281. data/perl/HeaderParseService/tmp/tmpVec_1156237246.08016_test5 +23 -0
  282. data/perl/HeaderParseService/tmp/tmpVec_1156237246.08016_test6 +23 -0
  283. data/perl/HeaderParseService/tmp/tmpVec_1156237246.08016_test7 +23 -0
  284. data/perl/HeaderParseService/tmp/tmpVec_1156237246.08016_test8 +23 -0
  285. data/perl/HeaderParseService/tmp/tmpVec_1156237246.08016_test9 +23 -0
  286. data/perl/HeaderParseService/tmp/tmpVec_914027525.276114_test1 +23 -0
  287. data/perl/HeaderParseService/tmp/tmpVec_914027525.276114_test10 +23 -0
  288. data/perl/HeaderParseService/tmp/tmpVec_914027525.276114_test11 +23 -0
  289. data/perl/HeaderParseService/tmp/tmpVec_914027525.276114_test12 +23 -0
  290. data/perl/HeaderParseService/tmp/tmpVec_914027525.276114_test13 +23 -0
  291. data/perl/HeaderParseService/tmp/tmpVec_914027525.276114_test14 +23 -0
  292. data/perl/HeaderParseService/tmp/tmpVec_914027525.276114_test15 +23 -0
  293. data/perl/HeaderParseService/tmp/tmpVec_914027525.276114_test2 +23 -0
  294. data/perl/HeaderParseService/tmp/tmpVec_914027525.276114_test3 +23 -0
  295. data/perl/HeaderParseService/tmp/tmpVec_914027525.276114_test4 +23 -0
  296. data/perl/HeaderParseService/tmp/tmpVec_914027525.276114_test5 +23 -0
  297. data/perl/HeaderParseService/tmp/tmpVec_914027525.276114_test6 +23 -0
  298. data/perl/HeaderParseService/tmp/tmpVec_914027525.276114_test7 +23 -0
  299. data/perl/HeaderParseService/tmp/tmpVec_914027525.276114_test8 +23 -0
  300. data/perl/HeaderParseService/tmp/tmpVec_914027525.276114_test9 +23 -0
  301. data/perl/ParsCit/README.TXT +82 -0
  302. data/perl/ParsCit/crfpp/traindata/parsCit.template +60 -0
  303. data/perl/ParsCit/crfpp/traindata/parsCit.train.data +12104 -0
  304. data/perl/ParsCit/crfpp/traindata/tagged_references.txt +500 -0
  305. data/perl/ParsCit/lib/CSXUtil/SafeText.pm +140 -0
  306. data/perl/ParsCit/lib/ParsCit/Citation.pm +462 -0
  307. data/perl/ParsCit/lib/ParsCit/CitationContext.pm +132 -0
  308. data/perl/ParsCit/lib/ParsCit/Config.pm +46 -0
  309. data/perl/ParsCit/lib/ParsCit/Controller.pm +306 -0
  310. data/perl/ParsCit/lib/ParsCit/PostProcess.pm +367 -0
  311. data/perl/ParsCit/lib/ParsCit/PreProcess.pm +333 -0
  312. data/perl/ParsCit/lib/ParsCit/Tr2crfpp.pm +331 -0
  313. data/perl/ParsCit/resources/parsCit.model +0 -0
  314. data/perl/ParsCit/resources/parsCitDict.txt +148783 -0
  315. data/perl/extract.pl +199 -0
  316. data/spec/biblicit/cb2bib_spec.rb +48 -0
  317. data/spec/biblicit/citeseer_spec.rb +40 -0
  318. data/spec/fixtures/pdf/10.1.1.109.4049.pdf +0 -0
  319. data/spec/fixtures/pdf/Bagnoli Watts TAR 2010.pdf +0 -0
  320. data/spec/fixtures/pdf/ICINCO_2010.pdf +0 -0
  321. data/spec/spec_helper.rb +3 -0
  322. metadata +474 -0
@@ -0,0 +1,122 @@
1
+ <HTML>
2
+ <HEAD><TITLE>College and University Home Pages</TITLE>
3
+ <LINK HREF="mailto:cdemello@mit.edu">
4
+ </HEAD>
5
+ <BODY>
6
+
7
+ <H1><A HREF="http://www.eff.org/blueribbon.html"><IMG ALIGN="TOP" ALT="[Blue
8
+ Ribbon]" SRC="blueribbon.gif"></A> College and University Home Pages -
9
+ Alphabetical Listing</H1>
10
+
11
+ <HR>
12
+ (C)Copyright 1995,1996 <A HREF="http://www.mit.edu:8001/people/cdemello/home.html">Christina DeMello</A>. Reproduction and distribution are
13
+ permissible for non-profit purposes only, but no changes are to be
14
+ made to these documents without the author's written consent. <P>
15
+
16
+ <STRONG>July, 1996</STRONG>. This list now has over
17
+ <STRONG>3000</STRONG> entries! Several other sites have given special
18
+ attention to this site, so they are now listed on a <A
19
+ HREF="recognition.html">recognition</A> page. <P>
20
+
21
+ In response to the increasing amount of email I have been getting, I've
22
+ compiled a list of <A HREF="faq.html">frequently asked
23
+ questions</A>. <EM>Please read these answers before sending me email!</EM>
24
+ <P>
25
+
26
+ <STRONG>SURVEY:</STRONG>I'm
27
+ no longer able to keep up with the survey on 'good school homepages', but
28
+ you can see the final <A HREF="results.html">results.</A> Look for an
29
+ updated survey within the next couple of months.<P>
30
+
31
+ For faster access, you may also want to try the following mirror sites:
32
+
33
+ <UL>
34
+
35
+ <LI><A HREF="http://www.mit.edu:8001/people/cdemello/univ.html">Boston, MA (Original Site - always has latest copy)</A>
36
+ <LI><A HREF="http://www.indiana.edu/~librcsd/resource/univ/univ.html">Indiana
37
+ University Libraries, Reseach Collections and Services Department</A>
38
+ <LI><A HREF="http://www.shu.edu/docs/world/schools/univ.html">Seton Hall University, South Orange, NJ</A>
39
+ <LI><A
40
+ HREF="http://www.mbark.swin.edu.au/mbark/pages/othersites/unis/cdemello/">Swinburne University, Australia</A>
41
+ <LI> <A
42
+ HREF="http://www.rirr.cnuce.cnr.it/universities/univ.html">CNUCE
43
+ Institute Pisa, Italy</A>
44
+ <LI> <A HREF="http://www-mcb.ucdavis.edu/info/CandU/univ.html">UC Davis,
45
+ Davis, CA</A>
46
+ <LI> <A HREF="http://geowww.uibk.ac.at/links/uni-search.html">University of
47
+ Innsbruck, Austria</A> (Searchable Index)
48
+ <LI> <A HREF="http://www.vol.it/mirror/universita/">Video On Line, Italy</A>
49
+
50
+ <LI> <A HREF="http://persia.nic.ir/~elibrary/Univ/univ.html">Institute for
51
+ Theoritical Studies in Physics and Mathematics, Iran</A>
52
+ </UL>
53
+
54
+ <HR>
55
+
56
+
57
+ All schools are currently arranged alphabetically. A <A
58
+ HREF="geog.html">geographical listing</A> is available as well. To
59
+ view colleges under a particular letter, select it below:
60
+
61
+ <P>
62
+
63
+ <A HREF="A.html">A</A>, <A HREF="B.html">B</A>, <A
64
+ HREF="C.html">C</A>, <A HREF="D.html">D</A>, <A HREF="E.html">E</A>,
65
+ <A HREF="F.html">F</A>, <A HREF="G.html">G</A>, <A
66
+ HREF="H.html">H</A>, <A HREF="I.html">I</A>, <A HREF="J.html">J</A>,
67
+ <A HREF="K.html">K</A>, <A HREF="L.html">L</A>, <A
68
+ HREF="M.html">M</A>, <A HREF="N.html">N</A>, <A HREF="O.html">O</A>,
69
+ <A HREF="P.html">P</A>, <A HREF="Q.html">Q</A>, <A
70
+ HREF="R.html">R</A>, <A HREF="S.html">S</A>, <A HREF="T.html">T</A>,
71
+ <A HREF="U.html">U</A>, <A HREF="V.html">V</A>, <A
72
+ HREF="W.html">W</A>, <A HREF="X.html">X</A>, <A
73
+ HREF="Y.html">Y</A>,<A HREF="Z.html">Z</A>
74
+
75
+ <P>
76
+
77
+ You can also access:
78
+ <UL>
79
+ <LI> <A HREF="univ-full.html">Full list of Universities</A>
80
+
81
+ <LI> <A HREF="geog.html">Geographical Listings of Universities</A>
82
+
83
+ <LI> <A HREF="faq.html">Frequently Asked Questions and their Answers</A>
84
+
85
+ <LI> <A HREF="results.html">Survey Results</A>
86
+
87
+ <LI> <A HREF="mirror.tar.Z">Compressed Tar File of All Files</A>
88
+
89
+ <LI> <A HREF="mirror.tar.gz">Gzipped Tar File of All Files</A>
90
+ </UL>
91
+
92
+ <HR>
93
+
94
+ These lists contain home pages (please do not send non-http URLs) for
95
+ universities and colleges all over the world. They do not contain
96
+ departmental pages unless the page in question did an adequate job of
97
+ providing general campus information as well, and the lists only include one
98
+ link per school. You can get more information about these lists in the new
99
+ <A HREF="faq.html">FAQ</A>. For a list of US Schools only, see Mike
100
+ Conlon's <A
101
+ HREF="http://www.clas.ufl.edu/CLAS/american-universities.html">list of
102
+ American Universities</A>. <P>
103
+
104
+ You can now send updates to the lists via <A
105
+ HREF="http://www.mit.edu:8001/people/cdemello/new-school.html">a
106
+ form</A>. If you do not have forms support, please send additions,
107
+ corrections, and comments to <EM><A
108
+ HREF="mailto:cdemello@mit.edu">cdemello@mit.edu</A></EM>. Please allow some
109
+ time for a response as I no longer have a direct net connection at work
110
+ every day. Also, this list may be updated more quickly than I can respond
111
+ personally to email.<P>
112
+
113
+ <HR>
114
+ <EM>cdemello@mit.edu, cdemello@us.oracle.com</EM>
115
+
116
+ <h5>Last modified: Mon Jul 15 00:14:26 1996</h5>
117
+ </BODY>
118
+ </HTML>
119
+
120
+
121
+
122
+
@@ -0,0 +1,31 @@
1
+ <HTML><HEAD><TITLE>Colleges and Universities - Uruguay </TITLE>
2
+ <LINK HREF="mailto:cdemello@mit.edu">
3
+ </HEAD><BODY><H1><A HREF="http://www.eff.org/blueribbon.html"><IMG ALIGN="TOP" ALT="[Blue Ribbon]" SRC="blueribbon.gif"></A>Colleges and Universities - Uruguay </H1>
4
+ <HR>
5
+
6
+ (C)Copyright 1995,1996 Christina DeMello. Reproduction and distribution are
7
+ permissible for non-profit purposes only, but no changes are to be
8
+ made to this document without the author's written consent. <P>
9
+
10
+ Please send additions, corrections, and comments to <EM><A
11
+ HREF="mailto:cdemello@mit.edu">cdemello@mit.edu</A></EM>. Please
12
+ give me a few days to respond as I no longer have a
13
+ direct net connection at work every day. Also, this list may be
14
+ updated more quickly than I can respond personally to email.<P> <HR> <OL>
15
+
16
+ <h5>Last modified: Thu Jul 11 20:53:17 1996</h5>
17
+
18
+ <LI> <A HREF="http://www.ort.edu.uy/">ORT Uruguay</A>
19
+ <LI> <A HREF="http://www.rau.edu.uy/universidad">Universidad de la Republica</A>
20
+ <LI> <A HREF="http://fisica.edu.uy/">Universidad de la Republica Oriental del Uruguay</A>
21
+ <LI> <A HREF="http://www.chasque.apc.org/pasecco/maryland/">University of Maryland, Montevideo Uruguay</A>
22
+ </OL>
23
+ <A HREF="geog.html">Return to Top Level</A>
24
+ <HR>
25
+ <EM><A HREF="mailto:cdemello@mit.edu">cdemello@mit.edu, cdemello@us.oracle.com</A></EM>
26
+ <P>
27
+ Last updated:
28
+
29
+ Mon Jul 15 00:42:41 PDT 1996
30
+ </BODY>
31
+ </HTML>
@@ -0,0 +1,34 @@
1
+ <HTML><HEAD><TITLE>Colleges and Universities - Venezuela </TITLE>
2
+ <LINK HREF="mailto:cdemello@mit.edu">
3
+ </HEAD><BODY><H1><A HREF="http://www.eff.org/blueribbon.html"><IMG ALIGN="TOP" ALT="[Blue Ribbon]" SRC="blueribbon.gif"></A>Colleges and Universities - Venezuela </H1>
4
+ <HR>
5
+
6
+ (C)Copyright 1995,1996 Christina DeMello. Reproduction and distribution are
7
+ permissible for non-profit purposes only, but no changes are to be
8
+ made to this document without the author's written consent. <P>
9
+
10
+ Please send additions, corrections, and comments to <EM><A
11
+ HREF="mailto:cdemello@mit.edu">cdemello@mit.edu</A></EM>. Please
12
+ give me a few days to respond as I no longer have a
13
+ direct net connection at work every day. Also, this list may be
14
+ updated more quickly than I can respond personally to email.<P> <HR> <OL>
15
+
16
+ <h5>Last modified: Thu Jul 11 20:53:17 1996</h5>
17
+
18
+ <LI> <A HREF="http://www.ucab.edu.ve/">Universidad Cat&oacute;lica Andr&eacute;s Bello</A>
19
+ <LI> <A HREF="http://www.ucv.edu.ve/">Universidad Central de Venezuela</A>
20
+ <LI> <A HREF="http://ourworld.compuserve.com/homepages/jzozaya/unimet.htm">Universidad Metropolitana</A>
21
+ <LI> <A HREF="http://www.une.edu.ve/">Universidad Nueva Esparta</A>
22
+ <LI> <A HREF="http://www.usb.ve/">Universidad Sim&oacute;n Bol&iacute;var (USB)</A>
23
+ <LI> <A HREF="http://mozart.ing.ula.ve/ula.html">Universidad de Los Andes (ULA)</A>
24
+ <LI> <A HREF="http://www.luz.ve/">Universidad del Zulia</A>
25
+ </OL>
26
+ <A HREF="geog.html">Return to Top Level</A>
27
+ <HR>
28
+ <EM><A HREF="mailto:cdemello@mit.edu">cdemello@mit.edu, cdemello@us.oracle.com</A></EM>
29
+ <P>
30
+ Last updated:
31
+
32
+ Mon Jul 15 00:42:42 PDT 1996
33
+ </BODY>
34
+ </HTML>
@@ -0,0 +1,28 @@
1
+ <HTML><HEAD><TITLE>Colleges and Universities - Yugoslavia </TITLE>
2
+ <LINK HREF="mailto:cdemello@mit.edu">
3
+ </HEAD><BODY><H1><A HREF="http://www.eff.org/blueribbon.html"><IMG ALIGN="TOP" ALT="[Blue Ribbon]" SRC="blueribbon.gif"></A>Colleges and Universities - Yugoslavia </H1>
4
+ <HR>
5
+
6
+ (C)Copyright 1995,1996 Christina DeMello. Reproduction and distribution are
7
+ permissible for non-profit purposes only, but no changes are to be
8
+ made to this document without the author's written consent. <P>
9
+
10
+ Please send additions, corrections, and comments to <EM><A
11
+ HREF="mailto:cdemello@mit.edu">cdemello@mit.edu</A></EM>. Please
12
+ give me a few days to respond as I no longer have a
13
+ direct net connection at work every day. Also, this list may be
14
+ updated more quickly than I can respond personally to email.<P> <HR> <OL>
15
+
16
+ <h5>Last modified: Thu Jul 11 20:53:17 1996</h5>
17
+
18
+ <LI> <A HREF="http://147.91.1.5/bu/">Univerzitet u Beogradu</A>
19
+ </OL>
20
+ <A HREF="geog.html">Return to Top Level</A>
21
+ <HR>
22
+ <EM><A HREF="mailto:cdemello@mit.edu">cdemello@mit.edu, cdemello@us.oracle.com</A></EM>
23
+ <P>
24
+ Last updated:
25
+
26
+ Mon Jul 15 00:42:43 PDT 1996
27
+ </BODY>
28
+ </HTML>
@@ -0,0 +1,46 @@
1
+ <HTML><HEAD><TITLE>Colleges and Universities - South Africa </TITLE>
2
+ <LINK HREF="mailto:cdemello@mit.edu">
3
+ </HEAD><BODY><H1><A HREF="http://www.eff.org/blueribbon.html"><IMG ALIGN="TOP" ALT="[Blue Ribbon]" SRC="blueribbon.gif"></A>Colleges and Universities - South Africa </H1>
4
+ <HR>
5
+
6
+ (C)Copyright 1995,1996 Christina DeMello. Reproduction and distribution are
7
+ permissible for non-profit purposes only, but no changes are to be
8
+ made to this document without the author's written consent. <P>
9
+
10
+ Please send additions, corrections, and comments to <EM><A
11
+ HREF="mailto:cdemello@mit.edu">cdemello@mit.edu</A></EM>. Please
12
+ give me a few days to respond as I no longer have a
13
+ direct net connection at work every day. Also, this list may be
14
+ updated more quickly than I can respond personally to email.<P> <HR> <OL>
15
+
16
+ <h5>Last modified: Thu Jul 11 20:53:17 1996</h5>
17
+
18
+ <LI> <A HREF="http://lin01.global.co.za/business/bmtc/">Business Management Training College of Southern Africa</A>
19
+ <LI> <A HREF="http://www.puk.ac.za/">Potchefstroom University for Christian Higher Education</A>
20
+ <LI> <A HREF="http://www.rau.ac.za/">Rand Afrikaans University</A>
21
+ <LI> <A HREF="http://www.ru.ac.za/">Rhodes University</A>
22
+ <LI> <A HREF="http://www.trsa.ac.za/">Technikon Southern Africa</A>
23
+ <LI> <A HREF="http://www.uct.ac.za/">University of Cape Town</A>
24
+ <LI> <A HREF="http://www.udw.ac.za/">University of Durban-Westville</A>
25
+ <LI> <A HREF="http://www.und.ac.za/">University of Natal (Durban)</A>
26
+ <LI> <A HREF="http://www.unp.ac.za/">University of Natal, Pietermaritzburg</A>
27
+ <LI> <A HREF="http://www.upe.ac.za/">University of Port Elizabeth</A>
28
+ <LI> <A HREF="http://www.up.ac.za/">University of Pretoria</A>
29
+ <LI> <A HREF="http://www.unisa.ac.za/">University of South Africa</A>
30
+ <LI> <A HREF="http://www.sun.ac.za">University of Stellenbosch</A>
31
+ <LI> <A HREF="http://www.utr.ac.za/">University of Transkei</A>
32
+ <LI> <A HREF="http://www.wits.ac.za/">University of Witwatersrand</A>
33
+ <LI> <A HREF="http://www.uzulu.ac.za/">University of Zululand</A>
34
+ <LI> <A HREF="http://www.uovs.ac.za/">University of the Orange Free State</A>
35
+ <LI> <A HREF="http://www.uwc.ac.za/">University of the Western Cape</A>
36
+ <LI> <A HREF="http://www.vista.ac.za/">Vista University</A>
37
+ </OL>
38
+ <A HREF="geog.html">Return to Top Level</A>
39
+ <HR>
40
+ <EM><A HREF="mailto:cdemello@mit.edu">cdemello@mit.edu, cdemello@us.oracle.com</A></EM>
41
+ <P>
42
+ Last updated:
43
+
44
+ Mon Jul 15 00:42:44 PDT 1996
45
+ </BODY>
46
+ </HTML>
@@ -0,0 +1,28 @@
1
+ <HTML><HEAD><TITLE>Colleges and Universities - Zambia </TITLE>
2
+ <LINK HREF="mailto:cdemello@mit.edu">
3
+ </HEAD><BODY><H1><A HREF="http://www.eff.org/blueribbon.html"><IMG ALIGN="TOP" ALT="[Blue Ribbon]" SRC="blueribbon.gif"></A>Colleges and Universities - Zambia </H1>
4
+ <HR>
5
+
6
+ (C)Copyright 1995,1996 Christina DeMello. Reproduction and distribution are
7
+ permissible for non-profit purposes only, but no changes are to be
8
+ made to this document without the author's written consent. <P>
9
+
10
+ Please send additions, corrections, and comments to <EM><A
11
+ HREF="mailto:cdemello@mit.edu">cdemello@mit.edu</A></EM>. Please
12
+ give me a few days to respond as I no longer have a
13
+ direct net connection at work every day. Also, this list may be
14
+ updated more quickly than I can respond personally to email.<P> <HR> <OL>
15
+
16
+ <h5>Last modified: Thu Jul 11 20:53:17 1996</h5>
17
+
18
+ <LI> <A HREF="http://www.zamnet.zm/unza/unza.html">University of Zambia</A>
19
+ </OL>
20
+ <A HREF="geog.html">Return to Top Level</A>
21
+ <HR>
22
+ <EM><A HREF="mailto:cdemello@mit.edu">cdemello@mit.edu, cdemello@us.oracle.com</A></EM>
23
+ <P>
24
+ Last updated:
25
+
26
+ Mon Jul 15 00:42:45 PDT 1996
27
+ </BODY>
28
+ </HTML>