biblicit 1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (322) hide show
  1. data/.gitignore +3 -0
  2. data/.rspec +1 -0
  3. data/Gemfile +6 -0
  4. data/LICENSE.TXT +176 -0
  5. data/README.md +120 -0
  6. data/Rakefile +8 -0
  7. data/biblicit.gemspec +33 -0
  8. data/lib/biblicit/cb2bib.rb +83 -0
  9. data/lib/biblicit/citeseer.rb +53 -0
  10. data/lib/biblicit/extractor.rb +37 -0
  11. data/lib/biblicit.rb +6 -0
  12. data/perl/DocFilter/lib/CSXUtil/SafeText.pm +140 -0
  13. data/perl/DocFilter/lib/DocFilter/Config.pm +35 -0
  14. data/perl/DocFilter/lib/DocFilter/Filter.pm +51 -0
  15. data/perl/FileConversionService/README.TXT +11 -0
  16. data/perl/FileConversionService/converters/PDFBox/pdfbox-app-1.7.1.jar +0 -0
  17. data/perl/FileConversionService/lib/CSXUtil/SafeText.pm +140 -0
  18. data/perl/FileConversionService/lib/FileConverter/CheckSum.pm +77 -0
  19. data/perl/FileConversionService/lib/FileConverter/Compression.pm +137 -0
  20. data/perl/FileConversionService/lib/FileConverter/Config.pm +57 -0
  21. data/perl/FileConversionService/lib/FileConverter/Controller.pm +191 -0
  22. data/perl/FileConversionService/lib/FileConverter/JODConverter.pm +61 -0
  23. data/perl/FileConversionService/lib/FileConverter/PDFBox.pm +69 -0
  24. data/perl/FileConversionService/lib/FileConverter/PSConverter.pm +69 -0
  25. data/perl/FileConversionService/lib/FileConverter/PSToText.pm +88 -0
  26. data/perl/FileConversionService/lib/FileConverter/Prescript.pm +68 -0
  27. data/perl/FileConversionService/lib/FileConverter/TET.pm +75 -0
  28. data/perl/FileConversionService/lib/FileConverter/Utils.pm +130 -0
  29. data/perl/HeaderParseService/README.TXT +80 -0
  30. data/perl/HeaderParseService/lib/CSXUtil/SafeText.pm +140 -0
  31. data/perl/HeaderParseService/lib/HeaderParse/API/AssembleXMLMetadata.pm +968 -0
  32. data/perl/HeaderParseService/lib/HeaderParse/API/Function.pm +2016 -0
  33. data/perl/HeaderParseService/lib/HeaderParse/API/LoadInformation.pm +444 -0
  34. data/perl/HeaderParseService/lib/HeaderParse/API/MultiClassChunking.pm +409 -0
  35. data/perl/HeaderParseService/lib/HeaderParse/API/NamePatternMatch.pm +537 -0
  36. data/perl/HeaderParseService/lib/HeaderParse/API/Parser.pm +68 -0
  37. data/perl/HeaderParseService/lib/HeaderParse/API/ParserMethods.pm +1880 -0
  38. data/perl/HeaderParseService/lib/HeaderParse/Config/API_Config.pm +46 -0
  39. data/perl/HeaderParseService/resources/data/EbizHeaders.txt +24330 -0
  40. data/perl/HeaderParseService/resources/data/EbizHeaders.txt.parsed +27506 -0
  41. data/perl/HeaderParseService/resources/data/EbizHeaders.txt.parsed.old +26495 -0
  42. data/perl/HeaderParseService/resources/data/tagged_headers.txt +40668 -0
  43. data/perl/HeaderParseService/resources/data/test_header.txt +31 -0
  44. data/perl/HeaderParseService/resources/data/test_header.txt.parsed +31 -0
  45. data/perl/HeaderParseService/resources/database/50states +60 -0
  46. data/perl/HeaderParseService/resources/database/AddrTopWords.txt +17 -0
  47. data/perl/HeaderParseService/resources/database/AffiTopWords.txt +35 -0
  48. data/perl/HeaderParseService/resources/database/AffiTopWordsAll.txt +533 -0
  49. data/perl/HeaderParseService/resources/database/ChineseSurNames.txt +276 -0
  50. data/perl/HeaderParseService/resources/database/Csurnames.bin +0 -0
  51. data/perl/HeaderParseService/resources/database/Csurnames_spec.bin +0 -0
  52. data/perl/HeaderParseService/resources/database/DomainSuffixes.txt +242 -0
  53. data/perl/HeaderParseService/resources/database/LabeledHeader +18 -0
  54. data/perl/HeaderParseService/resources/database/README +2 -0
  55. data/perl/HeaderParseService/resources/database/TrainMulClassLines +254 -0
  56. data/perl/HeaderParseService/resources/database/TrainMulClassLines1 +510 -0
  57. data/perl/HeaderParseService/resources/database/abstract.txt +1 -0
  58. data/perl/HeaderParseService/resources/database/abstractTopWords +9 -0
  59. data/perl/HeaderParseService/resources/database/addr.txt +28 -0
  60. data/perl/HeaderParseService/resources/database/affi.txt +34 -0
  61. data/perl/HeaderParseService/resources/database/affis.bin +0 -0
  62. data/perl/HeaderParseService/resources/database/all_namewords_spec.bin +0 -0
  63. data/perl/HeaderParseService/resources/database/allnamewords.bin +0 -0
  64. data/perl/HeaderParseService/resources/database/cities_US.txt +4512 -0
  65. data/perl/HeaderParseService/resources/database/cities_world.txt +4463 -0
  66. data/perl/HeaderParseService/resources/database/city.txt +3150 -0
  67. data/perl/HeaderParseService/resources/database/cityname.txt +3151 -0
  68. data/perl/HeaderParseService/resources/database/country_abbr.txt +243 -0
  69. data/perl/HeaderParseService/resources/database/countryname.txt +262 -0
  70. data/perl/HeaderParseService/resources/database/dateTopWords +30 -0
  71. data/perl/HeaderParseService/resources/database/degree.txt +67 -0
  72. data/perl/HeaderParseService/resources/database/email.txt +3 -0
  73. data/perl/HeaderParseService/resources/database/excludeWords.txt +40 -0
  74. data/perl/HeaderParseService/resources/database/female-names +4960 -0
  75. data/perl/HeaderParseService/resources/database/firstNames.txt +8448 -0
  76. data/perl/HeaderParseService/resources/database/firstnames.bin +0 -0
  77. data/perl/HeaderParseService/resources/database/firstnames_spec.bin +0 -0
  78. data/perl/HeaderParseService/resources/database/intro.txt +2 -0
  79. data/perl/HeaderParseService/resources/database/keyword.txt +5 -0
  80. data/perl/HeaderParseService/resources/database/keywordTopWords +7 -0
  81. data/perl/HeaderParseService/resources/database/male-names +3906 -0
  82. data/perl/HeaderParseService/resources/database/middleNames.txt +2 -0
  83. data/perl/HeaderParseService/resources/database/month.txt +35 -0
  84. data/perl/HeaderParseService/resources/database/mul +868 -0
  85. data/perl/HeaderParseService/resources/database/mul.label +869 -0
  86. data/perl/HeaderParseService/resources/database/mul.label.old +869 -0
  87. data/perl/HeaderParseService/resources/database/mul.processed +762 -0
  88. data/perl/HeaderParseService/resources/database/mulAuthor +619 -0
  89. data/perl/HeaderParseService/resources/database/mulClassStat +45 -0
  90. data/perl/HeaderParseService/resources/database/nickname.txt +58 -0
  91. data/perl/HeaderParseService/resources/database/nicknames.bin +0 -0
  92. data/perl/HeaderParseService/resources/database/note.txt +121 -0
  93. data/perl/HeaderParseService/resources/database/page.txt +1 -0
  94. data/perl/HeaderParseService/resources/database/phone.txt +9 -0
  95. data/perl/HeaderParseService/resources/database/postcode.txt +54 -0
  96. data/perl/HeaderParseService/resources/database/pubnum.txt +45 -0
  97. data/perl/HeaderParseService/resources/database/statename.bin +0 -0
  98. data/perl/HeaderParseService/resources/database/statename.txt +73 -0
  99. data/perl/HeaderParseService/resources/database/states_and_abbreviations.txt +118 -0
  100. data/perl/HeaderParseService/resources/database/stopwords +438 -0
  101. data/perl/HeaderParseService/resources/database/stopwords.bin +0 -0
  102. data/perl/HeaderParseService/resources/database/surNames.txt +19613 -0
  103. data/perl/HeaderParseService/resources/database/surnames.bin +0 -0
  104. data/perl/HeaderParseService/resources/database/surnames_spec.bin +0 -0
  105. data/perl/HeaderParseService/resources/database/university_list/A.html +167 -0
  106. data/perl/HeaderParseService/resources/database/university_list/B.html +161 -0
  107. data/perl/HeaderParseService/resources/database/university_list/C.html +288 -0
  108. data/perl/HeaderParseService/resources/database/university_list/D.html +115 -0
  109. data/perl/HeaderParseService/resources/database/university_list/E.html +147 -0
  110. data/perl/HeaderParseService/resources/database/university_list/F.html +112 -0
  111. data/perl/HeaderParseService/resources/database/university_list/G.html +115 -0
  112. data/perl/HeaderParseService/resources/database/university_list/H.html +140 -0
  113. data/perl/HeaderParseService/resources/database/university_list/I.html +138 -0
  114. data/perl/HeaderParseService/resources/database/university_list/J.html +82 -0
  115. data/perl/HeaderParseService/resources/database/university_list/K.html +115 -0
  116. data/perl/HeaderParseService/resources/database/university_list/L.html +131 -0
  117. data/perl/HeaderParseService/resources/database/university_list/M.html +201 -0
  118. data/perl/HeaderParseService/resources/database/university_list/N.html +204 -0
  119. data/perl/HeaderParseService/resources/database/university_list/O.html +89 -0
  120. data/perl/HeaderParseService/resources/database/university_list/P.html +125 -0
  121. data/perl/HeaderParseService/resources/database/university_list/Q.html +49 -0
  122. data/perl/HeaderParseService/resources/database/university_list/R.html +126 -0
  123. data/perl/HeaderParseService/resources/database/university_list/S.html +296 -0
  124. data/perl/HeaderParseService/resources/database/university_list/T.html +156 -0
  125. data/perl/HeaderParseService/resources/database/university_list/U.html +800 -0
  126. data/perl/HeaderParseService/resources/database/university_list/V.html +75 -0
  127. data/perl/HeaderParseService/resources/database/university_list/W.html +144 -0
  128. data/perl/HeaderParseService/resources/database/university_list/WCSelect.gif +0 -0
  129. data/perl/HeaderParseService/resources/database/university_list/X.html +44 -0
  130. data/perl/HeaderParseService/resources/database/university_list/Y.html +53 -0
  131. data/perl/HeaderParseService/resources/database/university_list/Z.html +43 -0
  132. data/perl/HeaderParseService/resources/database/university_list/ae.html +31 -0
  133. data/perl/HeaderParseService/resources/database/university_list/am.html +30 -0
  134. data/perl/HeaderParseService/resources/database/university_list/ar.html +35 -0
  135. data/perl/HeaderParseService/resources/database/university_list/at.html +43 -0
  136. data/perl/HeaderParseService/resources/database/university_list/au.html +82 -0
  137. data/perl/HeaderParseService/resources/database/university_list/bd.html +28 -0
  138. data/perl/HeaderParseService/resources/database/university_list/be.html +41 -0
  139. data/perl/HeaderParseService/resources/database/university_list/bg.html +28 -0
  140. data/perl/HeaderParseService/resources/database/university_list/bh.html +28 -0
  141. data/perl/HeaderParseService/resources/database/university_list/blueribbon.gif +0 -0
  142. data/perl/HeaderParseService/resources/database/university_list/bm.html +28 -0
  143. data/perl/HeaderParseService/resources/database/university_list/bn.html +28 -0
  144. data/perl/HeaderParseService/resources/database/university_list/br.html +66 -0
  145. data/perl/HeaderParseService/resources/database/university_list/ca.html +174 -0
  146. data/perl/HeaderParseService/resources/database/university_list/ch.html +52 -0
  147. data/perl/HeaderParseService/resources/database/university_list/cl.html +40 -0
  148. data/perl/HeaderParseService/resources/database/university_list/cn.html +87 -0
  149. data/perl/HeaderParseService/resources/database/university_list/co.html +39 -0
  150. data/perl/HeaderParseService/resources/database/university_list/cr.html +34 -0
  151. data/perl/HeaderParseService/resources/database/university_list/cy.html +34 -0
  152. data/perl/HeaderParseService/resources/database/university_list/cz.html +44 -0
  153. data/perl/HeaderParseService/resources/database/university_list/de.html +128 -0
  154. data/perl/HeaderParseService/resources/database/university_list/dean-mainlink.jpg +0 -0
  155. data/perl/HeaderParseService/resources/database/university_list/dk.html +42 -0
  156. data/perl/HeaderParseService/resources/database/university_list/ec.html +31 -0
  157. data/perl/HeaderParseService/resources/database/university_list/ee.html +30 -0
  158. data/perl/HeaderParseService/resources/database/university_list/eg.html +29 -0
  159. data/perl/HeaderParseService/resources/database/university_list/es.html +68 -0
  160. data/perl/HeaderParseService/resources/database/university_list/et.html +28 -0
  161. data/perl/HeaderParseService/resources/database/university_list/faq.html +147 -0
  162. data/perl/HeaderParseService/resources/database/university_list/fi.html +49 -0
  163. data/perl/HeaderParseService/resources/database/university_list/fj.html +28 -0
  164. data/perl/HeaderParseService/resources/database/university_list/fo.html +28 -0
  165. data/perl/HeaderParseService/resources/database/university_list/fr.html +106 -0
  166. data/perl/HeaderParseService/resources/database/university_list/geog.html +150 -0
  167. data/perl/HeaderParseService/resources/database/university_list/gr.html +38 -0
  168. data/perl/HeaderParseService/resources/database/university_list/gu.html +28 -0
  169. data/perl/HeaderParseService/resources/database/university_list/hk.html +34 -0
  170. data/perl/HeaderParseService/resources/database/university_list/hr.html +28 -0
  171. data/perl/HeaderParseService/resources/database/university_list/hu.html +46 -0
  172. data/perl/HeaderParseService/resources/database/university_list/id.html +29 -0
  173. data/perl/HeaderParseService/resources/database/university_list/ie.html +49 -0
  174. data/perl/HeaderParseService/resources/database/university_list/il.html +35 -0
  175. data/perl/HeaderParseService/resources/database/university_list/in.html +109 -0
  176. data/perl/HeaderParseService/resources/database/university_list/is.html +32 -0
  177. data/perl/HeaderParseService/resources/database/university_list/it.html +75 -0
  178. data/perl/HeaderParseService/resources/database/university_list/jm.html +28 -0
  179. data/perl/HeaderParseService/resources/database/university_list/jo.html +28 -0
  180. data/perl/HeaderParseService/resources/database/university_list/jp.html +155 -0
  181. data/perl/HeaderParseService/resources/database/university_list/kaplan.gif +0 -0
  182. data/perl/HeaderParseService/resources/database/university_list/kr.html +65 -0
  183. data/perl/HeaderParseService/resources/database/university_list/kw.html +28 -0
  184. data/perl/HeaderParseService/resources/database/university_list/lb.html +28 -0
  185. data/perl/HeaderParseService/resources/database/university_list/linkbw2.gif +0 -0
  186. data/perl/HeaderParseService/resources/database/university_list/lk.html +30 -0
  187. data/perl/HeaderParseService/resources/database/university_list/lt.html +31 -0
  188. data/perl/HeaderParseService/resources/database/university_list/lu.html +34 -0
  189. data/perl/HeaderParseService/resources/database/university_list/lv.html +30 -0
  190. data/perl/HeaderParseService/resources/database/university_list/ma.html +28 -0
  191. data/perl/HeaderParseService/resources/database/university_list/maczynski.gif +0 -0
  192. data/perl/HeaderParseService/resources/database/university_list/mirror.tar +0 -0
  193. data/perl/HeaderParseService/resources/database/university_list/mk.html +29 -0
  194. data/perl/HeaderParseService/resources/database/university_list/mo.html +29 -0
  195. data/perl/HeaderParseService/resources/database/university_list/mseawdm.gif +0 -0
  196. data/perl/HeaderParseService/resources/database/university_list/mt.html +28 -0
  197. data/perl/HeaderParseService/resources/database/university_list/mx.html +68 -0
  198. data/perl/HeaderParseService/resources/database/university_list/my.html +39 -0
  199. data/perl/HeaderParseService/resources/database/university_list/ni.html +28 -0
  200. data/perl/HeaderParseService/resources/database/university_list/nl.html +51 -0
  201. data/perl/HeaderParseService/resources/database/university_list/no.html +56 -0
  202. data/perl/HeaderParseService/resources/database/university_list/nz.html +41 -0
  203. data/perl/HeaderParseService/resources/database/university_list/pa.html +31 -0
  204. data/perl/HeaderParseService/resources/database/university_list/pe.html +40 -0
  205. data/perl/HeaderParseService/resources/database/university_list/ph.html +41 -0
  206. data/perl/HeaderParseService/resources/database/university_list/pl.html +51 -0
  207. data/perl/HeaderParseService/resources/database/university_list/pointcom.gif +0 -0
  208. data/perl/HeaderParseService/resources/database/university_list/pr.html +31 -0
  209. data/perl/HeaderParseService/resources/database/university_list/ps.html +28 -0
  210. data/perl/HeaderParseService/resources/database/university_list/pt.html +45 -0
  211. data/perl/HeaderParseService/resources/database/university_list/recognition.html +69 -0
  212. data/perl/HeaderParseService/resources/database/university_list/results.html +71 -0
  213. data/perl/HeaderParseService/resources/database/university_list/ro.html +38 -0
  214. data/perl/HeaderParseService/resources/database/university_list/ru.html +48 -0
  215. data/perl/HeaderParseService/resources/database/university_list/sd.html +28 -0
  216. data/perl/HeaderParseService/resources/database/university_list/se.html +57 -0
  217. data/perl/HeaderParseService/resources/database/university_list/sg.html +33 -0
  218. data/perl/HeaderParseService/resources/database/university_list/si.html +30 -0
  219. data/perl/HeaderParseService/resources/database/university_list/sk.html +35 -0
  220. data/perl/HeaderParseService/resources/database/university_list/th.html +45 -0
  221. data/perl/HeaderParseService/resources/database/university_list/tr.html +44 -0
  222. data/perl/HeaderParseService/resources/database/university_list/tw.html +76 -0
  223. data/perl/HeaderParseService/resources/database/university_list/ua.html +29 -0
  224. data/perl/HeaderParseService/resources/database/university_list/uk.html +168 -0
  225. data/perl/HeaderParseService/resources/database/university_list/univ-full.html +3166 -0
  226. data/perl/HeaderParseService/resources/database/university_list/univ.html +122 -0
  227. data/perl/HeaderParseService/resources/database/university_list/uy.html +31 -0
  228. data/perl/HeaderParseService/resources/database/university_list/ve.html +34 -0
  229. data/perl/HeaderParseService/resources/database/university_list/yu.html +28 -0
  230. data/perl/HeaderParseService/resources/database/university_list/za.html +46 -0
  231. data/perl/HeaderParseService/resources/database/university_list/zm.html +28 -0
  232. data/perl/HeaderParseService/resources/database/university_list.txt +3025 -0
  233. data/perl/HeaderParseService/resources/database/url.txt +1 -0
  234. data/perl/HeaderParseService/resources/database/webTopWords +225 -0
  235. data/perl/HeaderParseService/resources/database/words +45402 -0
  236. data/perl/HeaderParseService/resources/models/10ContextModelfold1 +369 -0
  237. data/perl/HeaderParseService/resources/models/10Modelfold1 +376 -0
  238. data/perl/HeaderParseService/resources/models/11ContextModelfold1 +400 -0
  239. data/perl/HeaderParseService/resources/models/11Modelfold1 +526 -0
  240. data/perl/HeaderParseService/resources/models/12ContextModelfold1 +510 -0
  241. data/perl/HeaderParseService/resources/models/12Modelfold1 +423 -0
  242. data/perl/HeaderParseService/resources/models/13ContextModelfold1 +364 -0
  243. data/perl/HeaderParseService/resources/models/13Modelfold1 +677 -0
  244. data/perl/HeaderParseService/resources/models/14ContextModelfold1 +459 -0
  245. data/perl/HeaderParseService/resources/models/14Modelfold1 +325 -0
  246. data/perl/HeaderParseService/resources/models/15ContextModelfold1 +340 -0
  247. data/perl/HeaderParseService/resources/models/15Modelfold1 +390 -0
  248. data/perl/HeaderParseService/resources/models/1ContextModelfold1 +668 -0
  249. data/perl/HeaderParseService/resources/models/1Modelfold1 +1147 -0
  250. data/perl/HeaderParseService/resources/models/2ContextModelfold1 +755 -0
  251. data/perl/HeaderParseService/resources/models/2Modelfold1 +796 -0
  252. data/perl/HeaderParseService/resources/models/3ContextModelfold1 +1299 -0
  253. data/perl/HeaderParseService/resources/models/3Modelfold1 +1360 -0
  254. data/perl/HeaderParseService/resources/models/4ContextModelfold1 +1062 -0
  255. data/perl/HeaderParseService/resources/models/4Modelfold1 +993 -0
  256. data/perl/HeaderParseService/resources/models/5ContextModelfold1 +1339 -0
  257. data/perl/HeaderParseService/resources/models/5Modelfold1 +2098 -0
  258. data/perl/HeaderParseService/resources/models/6ContextModelfold1 +888 -0
  259. data/perl/HeaderParseService/resources/models/6Modelfold1 +620 -0
  260. data/perl/HeaderParseService/resources/models/7ContextModelfold1 +257 -0
  261. data/perl/HeaderParseService/resources/models/7Modelfold1 +228 -0
  262. data/perl/HeaderParseService/resources/models/8ContextModelfold1 +677 -0
  263. data/perl/HeaderParseService/resources/models/8Modelfold1 +1871 -0
  264. data/perl/HeaderParseService/resources/models/9ContextModelfold1 +198 -0
  265. data/perl/HeaderParseService/resources/models/9Modelfold1 +170 -0
  266. data/perl/HeaderParseService/resources/models/NameSpaceModel +181 -0
  267. data/perl/HeaderParseService/resources/models/NameSpaceTrainF +347 -0
  268. data/perl/HeaderParseService/resources/models/WrapperBaseFeaDict +13460 -0
  269. data/perl/HeaderParseService/resources/models/WrapperContextFeaDict +14045 -0
  270. data/perl/HeaderParseService/resources/models/WrapperSpaceAuthorFeaDict +510 -0
  271. data/perl/HeaderParseService/tmp/tmpVec_1156237246.08016_test1 +23 -0
  272. data/perl/HeaderParseService/tmp/tmpVec_1156237246.08016_test10 +23 -0
  273. data/perl/HeaderParseService/tmp/tmpVec_1156237246.08016_test11 +23 -0
  274. data/perl/HeaderParseService/tmp/tmpVec_1156237246.08016_test12 +23 -0
  275. data/perl/HeaderParseService/tmp/tmpVec_1156237246.08016_test13 +23 -0
  276. data/perl/HeaderParseService/tmp/tmpVec_1156237246.08016_test14 +23 -0
  277. data/perl/HeaderParseService/tmp/tmpVec_1156237246.08016_test15 +23 -0
  278. data/perl/HeaderParseService/tmp/tmpVec_1156237246.08016_test2 +23 -0
  279. data/perl/HeaderParseService/tmp/tmpVec_1156237246.08016_test3 +23 -0
  280. data/perl/HeaderParseService/tmp/tmpVec_1156237246.08016_test4 +23 -0
  281. data/perl/HeaderParseService/tmp/tmpVec_1156237246.08016_test5 +23 -0
  282. data/perl/HeaderParseService/tmp/tmpVec_1156237246.08016_test6 +23 -0
  283. data/perl/HeaderParseService/tmp/tmpVec_1156237246.08016_test7 +23 -0
  284. data/perl/HeaderParseService/tmp/tmpVec_1156237246.08016_test8 +23 -0
  285. data/perl/HeaderParseService/tmp/tmpVec_1156237246.08016_test9 +23 -0
  286. data/perl/HeaderParseService/tmp/tmpVec_914027525.276114_test1 +23 -0
  287. data/perl/HeaderParseService/tmp/tmpVec_914027525.276114_test10 +23 -0
  288. data/perl/HeaderParseService/tmp/tmpVec_914027525.276114_test11 +23 -0
  289. data/perl/HeaderParseService/tmp/tmpVec_914027525.276114_test12 +23 -0
  290. data/perl/HeaderParseService/tmp/tmpVec_914027525.276114_test13 +23 -0
  291. data/perl/HeaderParseService/tmp/tmpVec_914027525.276114_test14 +23 -0
  292. data/perl/HeaderParseService/tmp/tmpVec_914027525.276114_test15 +23 -0
  293. data/perl/HeaderParseService/tmp/tmpVec_914027525.276114_test2 +23 -0
  294. data/perl/HeaderParseService/tmp/tmpVec_914027525.276114_test3 +23 -0
  295. data/perl/HeaderParseService/tmp/tmpVec_914027525.276114_test4 +23 -0
  296. data/perl/HeaderParseService/tmp/tmpVec_914027525.276114_test5 +23 -0
  297. data/perl/HeaderParseService/tmp/tmpVec_914027525.276114_test6 +23 -0
  298. data/perl/HeaderParseService/tmp/tmpVec_914027525.276114_test7 +23 -0
  299. data/perl/HeaderParseService/tmp/tmpVec_914027525.276114_test8 +23 -0
  300. data/perl/HeaderParseService/tmp/tmpVec_914027525.276114_test9 +23 -0
  301. data/perl/ParsCit/README.TXT +82 -0
  302. data/perl/ParsCit/crfpp/traindata/parsCit.template +60 -0
  303. data/perl/ParsCit/crfpp/traindata/parsCit.train.data +12104 -0
  304. data/perl/ParsCit/crfpp/traindata/tagged_references.txt +500 -0
  305. data/perl/ParsCit/lib/CSXUtil/SafeText.pm +140 -0
  306. data/perl/ParsCit/lib/ParsCit/Citation.pm +462 -0
  307. data/perl/ParsCit/lib/ParsCit/CitationContext.pm +132 -0
  308. data/perl/ParsCit/lib/ParsCit/Config.pm +46 -0
  309. data/perl/ParsCit/lib/ParsCit/Controller.pm +306 -0
  310. data/perl/ParsCit/lib/ParsCit/PostProcess.pm +367 -0
  311. data/perl/ParsCit/lib/ParsCit/PreProcess.pm +333 -0
  312. data/perl/ParsCit/lib/ParsCit/Tr2crfpp.pm +331 -0
  313. data/perl/ParsCit/resources/parsCit.model +0 -0
  314. data/perl/ParsCit/resources/parsCitDict.txt +148783 -0
  315. data/perl/extract.pl +199 -0
  316. data/spec/biblicit/cb2bib_spec.rb +48 -0
  317. data/spec/biblicit/citeseer_spec.rb +40 -0
  318. data/spec/fixtures/pdf/10.1.1.109.4049.pdf +0 -0
  319. data/spec/fixtures/pdf/Bagnoli Watts TAR 2010.pdf +0 -0
  320. data/spec/fixtures/pdf/ICINCO_2010.pdf +0 -0
  321. data/spec/spec_helper.rb +3 -0
  322. metadata +474 -0
@@ -0,0 +1,31 @@
1
+ Protocols for Collecting Responses
2
+
3
+ in Multi-hop Radio Networks
4
+
5
+ Chungki Lee James E. Burns
6
+
7
+ Mostafa H. Ammar
8
+
9
+ GIT-CC-92/28
10
+
11
+ June 1992
12
+
13
+ Abstract
14
+
15
+ The problem of collecting responses in multi-hop radio networks is considered. A given node, called the source, is to collect a specified number of
16
+
17
+ responses from nodes in a radio network. The problem arises in several
18
+
19
+ applications of distributed systems. A deterministic and a randomized protocol for the problem are presented. The two protocols are analyzed and
20
+
21
+ their performance is compared. Conclusions are drawn about the suitability
22
+
23
+ of our protocols in various network environments.
24
+
25
+ College of Computing
26
+
27
+ Georgia Institute of Technology
28
+
29
+ Atlanta, Georgia 30332-0280
30
+
31
+
@@ -0,0 +1,31 @@
1
+ headerno(1) -- lineno(1):
2
+ chunk(1) -- class(1 <> content(Protocols for Collecting Responses
3
+ lineno(2):
4
+ chunk(1) -- class(1 <> content(in Multi-hop Radio Networks
5
+ lineno(3):
6
+ chunk(1) -- class(2 <> content(Chungki Lee
7
+ chunk(2) -- class(2 <> content(James E. Burns
8
+ lineno(4):
9
+ chunk(1) -- class(2 <> content(Mostafa H. Ammar
10
+ lineno(5):
11
+ chunk(1) -- class(14 <> content(GIT-CC-92/28
12
+ lineno(6):
13
+ chunk(1) -- class(7 <> content(June 1992
14
+ lineno(7):
15
+ chunk(1) -- class(8 <> content(Abstract
16
+ lineno(8):
17
+ chunk(1) -- class(8 <> content(The problem of collecting responses in multi-hop radio networks is considered . A given node , called the source , is to collect a specified number of
18
+ lineno(9):
19
+ chunk(1) -- class(8 <> content(responses from nodes in a radio network . The problem arises in several
20
+ lineno(10):
21
+ chunk(1) -- class(8 <> content(applications of distributed systems . A deterministic and a randomized protocol for the problem are presented . The two protocols are analyzed and
22
+ lineno(11):
23
+ chunk(1) -- class(8 <> content(their performance is compared . Conclusions are drawn about the suitability
24
+ lineno(12):
25
+ chunk(1) -- class(8 <> content(of our protocols in various network environments.
26
+ lineno(13):
27
+ chunk(1) -- class(3 <> content(College of Computing
28
+ lineno(14):
29
+ chunk(1) -- class(3 <> content(Georgia Institute of Technology
30
+ lineno(15):
31
+ chunk(1) -- class(4 <> content(Atlanta , Georgia 30332-0280
@@ -0,0 +1,60 @@
1
+ ALABAMA
2
+ MISSOURI
3
+ ALASKA
4
+ MONTANA
5
+ ARIZONA
6
+ NEBRASKA
7
+ ARKANSAS
8
+ NEVADA
9
+ CALIFORNIA
10
+ NEW HAMPSHIRE
11
+ COLORADO
12
+ NEW JERSEY
13
+ CONNECTICUT
14
+ NEW MEXICO
15
+ DELAWARE
16
+ NEW YORK
17
+ DIST OF COLUMBIA
18
+ NORTH
19
+ CAROLINA
20
+ FLORIDA
21
+ NORTH DAKOTA
22
+ GEORGIA
23
+ OHIO
24
+ HAWAII
25
+ OKLAHOMA
26
+ IDAHO
27
+ OREGON
28
+ ILLINOIS
29
+ PENNSYLVANIA
30
+ INDIANA
31
+ RHODE ISLAND
32
+ IOWA
33
+ SOUTH
34
+ CAROLINA
35
+ KANSAS
36
+ SOUTH DAKOTA
37
+ KENTUCKY
38
+ TENNESSEE
39
+ LOUISIANA
40
+ TEXAS
41
+ MAINE
42
+ UTAH
43
+ MARYLAND
44
+ VERMONT
45
+ MASSACHUSETTS
46
+ VIRGINIA
47
+ MICHIGAN
48
+ WASHINGTON
49
+ MINNESOTA
50
+ WEST VIRGINIA
51
+ MISSISSIPPI
52
+ WISCONSIN
53
+ WYOMING
54
+ AMERICAN SAMOA
55
+ NORTHERN MARIANA ISLANDS
56
+ FEDERATED STATES of
57
+ MICRONESIA
58
+ Puerto Rico
59
+ MARSHALL ISLANDS
60
+ VIRGIN ISLANDS
@@ -0,0 +1,17 @@
1
+ 25 Park
2
+ 21 Box
3
+ 15 Street
4
+ 14 St
5
+ 13 Avenue
6
+ 8 Hill
7
+ 8 Road
8
+ 8 Way
9
+ 8 Hall
10
+ 7 Suite
11
+ 7 Square
12
+ 6 Center
13
+ 5 City
14
+ 4 Campus
15
+ 3 Building
16
+ 3 Ave
17
+ 3 Address
@@ -0,0 +1,35 @@
1
+ 325 University
2
+ 111 Univ
3
+ 221 Department
4
+ 77 Institute
5
+ 47 Research
6
+ 39 Sciences
7
+ 37 Laboratory
8
+ 34 Technology
9
+ 33 Dept
10
+ 27 Systems
11
+ 26 School
12
+ 26 Center
13
+ 18 Division
14
+ 13 College
15
+ 13 Group
16
+ 11 Dipartimento
17
+ 11 Studies
18
+ 11 Universita
19
+ 10 INSTITUTE
20
+ 8 Inc
21
+ 8 Media
22
+ 8 Institut
23
+ 8 Universitat
24
+ 7 UNIVERSITY
25
+ 5 Informatik
26
+ 5 TECHNOLOGY
27
+ 5 Lab
28
+ 4 Universitat-GH
29
+ 4 Corporation
30
+ 3 Labs
31
+ 3 Laboratories
32
+ 3 LABORATORY
33
+ 3 Elettronica
34
+ 3 DEPARTMENT
35
+ 2 Universit
@@ -0,0 +1,533 @@
1
+ 504 of
2
+ 325 University
3
+ 270 Computer
4
+ 232 Science
5
+ 221 Department
6
+ 111 and
7
+ 77 Institute
8
+ 55 Engineering
9
+ 47 Research
10
+ 46 for
11
+ 43 The
12
+ 41 California
13
+ 39 Sciences
14
+ 37 Laboratory
15
+ 34 Technology
16
+ 33 Dept
17
+ 32 at
18
+ 30 Information
19
+ 28 Massachusetts
20
+ 28 di
21
+ 27 Systems
22
+ 26 School
23
+ 26 Center
24
+ 24 Electrical
25
+ 24 Berkeley
26
+ 20 Maryland
27
+ 18 Division
28
+ 17 &
29
+ 17 Washington
30
+ 16 Computing
31
+ 16 Mathematics
32
+ 15 State
33
+ 13 Intelligence
34
+ 13 Advanced
35
+ 13 Artificial
36
+ 13 Carnegie
37
+ 13 College
38
+ 13 Mellon
39
+ 13 Group
40
+ 12 Robotics
41
+ 12 OF
42
+ 11 Michigan
43
+ 11 Dipartimento
44
+ 11 Studies
45
+ 11 Universita
46
+ 11 Texas
47
+ 10 Intelligent
48
+ 10 Illinois
49
+ 10 INSTITUTE
50
+ 10 MIT
51
+ 9 Informatica
52
+ 8 Urbana-Champaign
53
+ 8 Inc
54
+ 8 Media
55
+ 8 Institut
56
+ 8 Universitat
57
+ 8 Austin
58
+ 8 Ohio
59
+ 8 National
60
+ 8 EECS
61
+ 7 Hebrew
62
+ 7 fur
63
+ 7 UNIVERSITY
64
+ 7 de
65
+ 7 Stanford
66
+ 7 Columbia
67
+ 6 Microsoft
68
+ 6 Applied
69
+ 6 Georgia
70
+ 6 Computational
71
+ 6 Colorado
72
+ 6 Oregon
73
+ 6 Graduate
74
+ 6 Birmingham
75
+ 6 Florida
76
+ 5 Rutgers
77
+ 5 Carolina
78
+ 5 Informatik
79
+ 5 TECHNOLOGY
80
+ 5 Lab
81
+ 5 Dartmouth
82
+ 5 Paderborn
83
+ 5 UCLA
84
+ 5 MASSACHUSETTS
85
+ 5 COMPUTER
86
+ 5 SCIENCE
87
+ 5 Toronto
88
+ 5 Southern
89
+ 5 Mathematical
90
+ 5 North
91
+ 5 Vision
92
+ 4 Karlsruhe
93
+ 4 Nijmegen
94
+ 4 Universitat-GH
95
+ 4 Hill
96
+ 4 UC
97
+ 4 Wisconsin-Madison
98
+ 4 Bonn
99
+ 4 INTERNATIONAL
100
+ 4 Wisconsin
101
+ 4 Machines
102
+ 4 Indiana
103
+ 4 International
104
+ 4 Corporation
105
+ 4 Rochester
106
+ 3 Aviv
107
+ 3 Perceptual
108
+ 3 Pisa
109
+ 3 Physics
110
+ 3 Changsha
111
+ 3 Labs
112
+ 3 Software
113
+ 3 Council
114
+ 3 Sandia
115
+ 3 Biostatistics
116
+ 3 Informatics
117
+ 3 Hughes
118
+ 3 Tel
119
+ 3 Silicon
120
+ 3 Chapel
121
+ 3 Psychology
122
+ 3 Statistics
123
+ 3 Laboratories
124
+ 3 Utah
125
+ 3 Cornell
126
+ 3 New
127
+ 3 CS
128
+ 3 Centro
129
+ 3 -
130
+ 3 Knowledge
131
+ 3 IBM
132
+ 3 LABORATORY
133
+ 3 AT&T
134
+ 3 Elettronica
135
+ 3 DEPARTMENT
136
+ 3 Graphics
137
+ 3 Electronics
138
+ 3 AND
139
+ 3 Eng
140
+ 3 Rice
141
+ 3 ECE
142
+ 3 Utrecht
143
+ 3 Technical
144
+ 3 Yale
145
+ 3 CALIFORNIA
146
+ 2 Heinz
147
+ 2 Caltech
148
+ 2 Syracuse
149
+ 2 Computation
150
+ 2 Berlin
151
+ 2 Wales
152
+ 2 ARTIFICIAL
153
+ 2 Jerusalem
154
+ 2 Bell
155
+ 2 BIOLOGICAL
156
+ 2 Manufacturing
157
+ 2 Sistemistica
158
+ 2 Parallel
159
+ 2 York
160
+ 2 Polytechnic
161
+ 2 Technische
162
+ 2 Virginia
163
+ 2 (DIAG)
164
+ 2 INTELLIGENCE
165
+ 2 Architectures
166
+ 2 Bologna
167
+ 2 Arizona
168
+ 2 Milano
169
+ 2 Distributed
170
+ 2 Open
171
+ 2 Hawaii
172
+ 2 Pacific
173
+ 2 ?
174
+ 2 Universit
175
+ 2 Oxford
176
+ 2 Amherst
177
+ 2 INFORMATION
178
+ 2 Stefan
179
+ 2 /
180
+ 2 SCIENCES
181
+ 2 Maastricht
182
+ 2 NASA
183
+ 2 Watson
184
+ 2 Sci
185
+ 2 MADISON
186
+ 2 EE
187
+ 2 Univ
188
+ 2 Brigham
189
+ 2 Agents
190
+ 2 Cognitive
191
+ 2 Harvard
192
+ 2 FR-35
193
+ 2 Development
194
+ 2 Central
195
+ 2 Torino
196
+ 2 Deakin
197
+ 2 Principles
198
+ 2 WISCONSIN
199
+ 2 Canada
200
+ 2 South
201
+ 2 CENTER
202
+ 2 Mechanical
203
+ 2 Nixdorf
204
+ 2 Amsterdam
205
+ 2 Inst
206
+ 2 Waikato
207
+ 2 Biological
208
+ 2 Network
209
+ 2 NEC
210
+ 2 Manchester
211
+ 2 Universiteit
212
+ 2 Minnesota
213
+ 2 Logic
214
+ 2 Scienze
215
+ 2 Section
216
+ 2 Johns
217
+ 2 Hopkins
218
+ 2 Young
219
+ 2 UCSD
220
+ 2 Retrieval
221
+ 2 Salk
222
+ 2 Real-Time
223
+ 2 Ames
224
+ 2 Technologies
225
+ 2 FOR
226
+ 2 217-50
227
+ 2 Politecnico
228
+ 2 Purdue
229
+ 1 Nimble
230
+ 1 European
231
+ 1 LEARNING
232
+ 1 Mobile
233
+ 1 NSW
234
+ 1 (IIIA)
235
+ 1 FB
236
+ 1 Kommunikationsforschung
237
+ 1 Palo
238
+ 1 (i)
239
+ 1 Philips
240
+ 1 Clinic
241
+ 1 Conseil
242
+ 1 (RSA
243
+ 1 Neurobiology
244
+ 1 -Systems
245
+ 1 McGill
246
+ 1 Aviation
247
+ 1 technologie
248
+ 1 Supercomputer
249
+ 1 Tohoku
250
+ 1 Friedrich-Schiller-Universitat
251
+ 1 BRAIN
252
+ 1 Departmento
253
+ 1 Lutheran
254
+ 1 CWI
255
+ 1 Walter
256
+ 1 Departments
257
+ 1 Lawrence
258
+ 1 Gilt
259
+ 1 Experimental
260
+ 1 ed
261
+ 1 Howard
262
+ 1 Corp
263
+ 1 GmbH
264
+ 1 (ISR)
265
+ 1 (SWI)
266
+ 1 (CSIC)
267
+ 1 flfl
268
+ 1 Italy
269
+ 1 Linguistics
270
+ 1 uZrich
271
+ 1 Unversity
272
+ 1 Algorithms
273
+ 1 Kepler
274
+ 1 Eindhoven
275
+ 1 Fachbereich
276
+ 1 CREST
277
+ 1 Linz
278
+ 1 Scienza
279
+ 1 Barbara
280
+ 1 Strategies
281
+ 1 Artifical
282
+ 1 CSIRO
283
+ 1 Salerno
284
+ 1 Beckman
285
+ 1 Astronomy
286
+ 1 Singapore
287
+ 1 INSEE
288
+ 1 Sun
289
+ 1 Cape
290
+ 1 CARNEGIE
291
+ 1 Haas
292
+ 1 Santa
293
+ 1 Design
294
+ 1 Scientific
295
+ 1 theoretische
296
+ 1 34610
297
+ 1 national
298
+ 1 *
299
+ 1 Dynamics)
300
+ 1 MELLON
301
+ 1 Unit
302
+ 1 COGNITIVE
303
+ 1 COAST
304
+ 1 RICE
305
+ 1 II"
306
+ 1 sd&m
307
+ 1 Control
308
+ 1 Berne
309
+ 1 Social
310
+ 1 Aarhus
311
+ 1 Business
312
+ 1 Physical
313
+ 1 ENGINEERING
314
+ 1 COMPUTATIONAL
315
+ 1 Object
316
+ 1 Argonne
317
+ 1 Integration
318
+ 1 CMRI
319
+ 1 und
320
+ 1 III
321
+ 1 Complex
322
+ 1 Robot
323
+ 1 Intelligence"
324
+ 1 KG
325
+ 1 Linear
326
+ 1 WNI
327
+ 1 Genova
328
+ 1 Sistemi
329
+ 1 Fraser
330
+ 1 Math
331
+ 1 Bogazi~ci
332
+ 1 Cadence
333
+ 1 Catania
334
+ 1 Xerox
335
+ 1 Appl
336
+ 1 Svizzero
337
+ 1 Propulsion
338
+ 1 Elec
339
+ 1 Boston
340
+ 1 Park
341
+ 1 Corporate
342
+ 1 Libre
343
+ 1 Programming
344
+ 1 Trier
345
+ 1 Oklahoma
346
+ 1 Optimization
347
+ 1 Dundee
348
+ 1 Branch
349
+ 1 Info
350
+ 1 RESEARCH
351
+ 1 Limited
352
+ 1 Networks
353
+ 1 COINS
354
+ 1 Spanish
355
+ 1 General
356
+ 1 Durham
357
+ 1 SYNERGY
358
+ 1 Freiburg
359
+ 1 (CSCS/SCSC)
360
+ 1 Telecom
361
+ 1 Jet
362
+ 1 Lederle
363
+ 1 A&T
364
+ 1 dei
365
+ 1 Masschusetts
366
+ 1 department
367
+ 1 (CoSCo)
368
+ 1 Depatrment
369
+ 1 Odense
370
+ 1 Laboratoire
371
+ 1 del
372
+ 1 Microsystems
373
+ 1 ILK
374
+ 1 Predictive
375
+ 1 Articifial
376
+ 1 Calcolo
377
+ 1 PROCESSING
378
+ 1 Slovak
379
+ 1 Bruxelles
380
+ 1 Sussex
381
+ 1 Wisconsin-Milwaukee
382
+ 1 Adaptive
383
+ 1 CNR
384
+ 1 Matematica
385
+ 1 Industrial
386
+ 1 (**)
387
+ 1 flflfl
388
+ 1 Cleveland
389
+ 1 Fernuniversitat-GH
390
+ 1 Napoli
391
+ 1 Universidad
392
+ 1 Materials
393
+ 1 Austrian
394
+ 1 Jena
395
+ 1 BERKELEY
396
+ 1 Rensselaer
397
+ 1 Cognitiva
398
+ 1 Linkoping
399
+ 1 Human-Computer
400
+ 1 COLLEGE
401
+ 1 Memphis
402
+ 1 Faculty
403
+ 1 Box
404
+ 1 IDSIA
405
+ 1 Electric
406
+ 1 ARCUS
407
+ 1 Wurzburg
408
+ 1 IRIDIA
409
+ 1 Health
410
+ 1 DAIMI
411
+ 1 Concordia
412
+ 1 Australian
413
+ 1 Army
414
+ 1 (LUC)
415
+ 1 Recom
416
+ 1 Supercomputing
417
+ 1 Op
418
+ 1 Swiss
419
+ 1 "Federico
420
+ 1 Medical
421
+ 1 Jordanstown
422
+ 1 Courant
423
+ 1 Northeast
424
+ 1 Calgary
425
+ 1 Iowa
426
+ 1 430
427
+ 1 Administration
428
+ 1 Aires
429
+ 1 Informazione
430
+ 1 DIMACS
431
+ 1 laboratory
432
+ 1 Simon
433
+ 1 Tilburg
434
+ 1 Buenos
435
+ 1 Teoria
436
+ 1 Federal
437
+ 1 AT
438
+ 1 c/o
439
+ 1 Real
440
+ 1 on
441
+ 1 Limburg
442
+ 1 Copenhagen
443
+ 1 Management
444
+ 1 Stirling
445
+ 1 Statistique
446
+ 1 ELECTRICAL
447
+ 1 Mayo
448
+ 1 Johannes
449
+ 1 Ind
450
+ 1 Learning
451
+ 1 Digital
452
+ 1 Also
453
+ 1 Hong
454
+ 1 Fakultat
455
+ 1 Fordham
456
+ 1 Standards
457
+ 1 Generali
458
+ 1 Mathematik-Informatik
459
+ 1 Neural
460
+ 1 Applicazioni
461
+ 1 information
462
+ 1 Campus
463
+ 1 Mining
464
+ 1 Munich
465
+ 1 Kong
466
+ 1 CB
467
+ 1 Hagen
468
+ 1 Alto
469
+ 1 STATISTICS
470
+ 1 Project
471
+ 1 (*)
472
+ 1 Combinatorics
473
+ 1 Theory
474
+ 1 Computacion
475
+ 1 Busch
476
+ 1 Physik
477
+ 1 Securities
478
+ 1 Chinese
479
+ 1 Kentucky
480
+ 1 linformation
481
+ 1 Mathematik
482
+ 1 Waterloo
483
+ 1 Town
484
+ 1 Temple
485
+ 1 Geometric
486
+ 1 Ulster
487
+ 1 Security
488
+ 1 Irvine
489
+ 1 Co
490
+ 1 ELECTRONICS
491
+ 1 with
492
+ 1 Visualization
493
+ 1 Public
494
+ 1 Siemens
495
+ 1 Metallurgical
496
+ 1 Phonetik
497
+ 1 recherches
498
+ 1 L'Aquila
499
+ 1 (ii)
500
+ 1 "Methods
501
+ 1 Ulm
502
+ 1 Maine
503
+ 1 Data
504
+ 1 dellInformazione
505
+ 1 Labs|Research
506
+ 1 DEC/SRC
507
+ 1 Univeristy
508
+ 1 SOUTHERN
509
+ 1 Vanderbilt
510
+ 1 Interaction
511
+ 1 WHITAKER
512
+ 1 Accelerator
513
+ 1 Connecticut
514
+ 1 UMIACS
515
+ 1 Weizmann
516
+ 1 Time
517
+ 1 Pavia
518
+ 1 Princeton
519
+ 1 dell'Informazione
520
+ 1 EA
521
+ 1 Bellcore
522
+ 1 Oriented
523
+ 1 Caelum
524
+ 1 Scientifico
525
+ 1 Equipment
526
+ 1 ALPHA
527
+ 1 Toledo
528
+ 1 Worcester
529
+ 1 Chicago
530
+ 1 Duke
531
+ 1 Pennsylvania
532
+ 1 Jozef
533
+ 1 Engr