biblicit 2.1.0 → 2.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (246) hide show
  1. data/biblicit.gemspec +0 -1
  2. data/lib/biblicit/extractor.rb +2 -7
  3. data/lib/biblicit/parscit.rb +18 -6
  4. data/lib/biblicit/version.rb +1 -1
  5. data/parscit/bin/citeExtract.pl +16 -4
  6. data/{svm-header-parse/HeaderParseService → parscit}/lib/HeaderParse/API/AssembleXMLMetadata.pm +0 -0
  7. data/{svm-header-parse/HeaderParseService → parscit}/lib/HeaderParse/API/Function.pm +0 -0
  8. data/{svm-header-parse/HeaderParseService → parscit}/lib/HeaderParse/API/LoadInformation.pm +0 -0
  9. data/{svm-header-parse/HeaderParseService → parscit}/lib/HeaderParse/API/MultiClassChunking.pm +0 -0
  10. data/{svm-header-parse/HeaderParseService → parscit}/lib/HeaderParse/API/NamePatternMatch.pm +0 -0
  11. data/{svm-header-parse/HeaderParseService → parscit}/lib/HeaderParse/API/Parser.pm +21 -0
  12. data/{svm-header-parse/HeaderParseService → parscit}/lib/HeaderParse/API/ParserMethods.pm +0 -0
  13. data/{svm-header-parse/HeaderParseService → parscit}/lib/HeaderParse/Config/API_Config.pm +11 -10
  14. data/{svm-header-parse/HeaderParseService → parscit/lib/HeaderParse}/README.TXT +0 -0
  15. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/50states +0 -0
  16. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/AddrTopWords.txt +0 -0
  17. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/AffiTopWords.txt +0 -0
  18. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/AffiTopWordsAll.txt +0 -0
  19. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/ChineseSurNames.txt +0 -0
  20. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/Csurnames.bin +0 -0
  21. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/Csurnames_spec.bin +0 -0
  22. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/DomainSuffixes.txt +0 -0
  23. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/LabeledHeader +0 -0
  24. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/README +0 -0
  25. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/TrainMulClassLines +0 -0
  26. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/TrainMulClassLines1 +0 -0
  27. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/abstract.txt +0 -0
  28. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/abstractTopWords +0 -0
  29. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/addr.txt +0 -0
  30. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/affi.txt +0 -0
  31. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/affis.bin +0 -0
  32. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/all_namewords_spec.bin +0 -0
  33. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/allnamewords.bin +0 -0
  34. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/cities_US.txt +0 -0
  35. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/cities_world.txt +0 -0
  36. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/city.txt +0 -0
  37. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/cityname.txt +0 -0
  38. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/country_abbr.txt +0 -0
  39. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/countryname.txt +0 -0
  40. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/dateTopWords +0 -0
  41. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/degree.txt +0 -0
  42. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/email.txt +0 -0
  43. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/excludeWords.txt +0 -0
  44. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/female-names +0 -0
  45. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/firstNames.txt +0 -0
  46. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/firstnames.bin +0 -0
  47. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/firstnames_spec.bin +0 -0
  48. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/intro.txt +0 -0
  49. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/keyword.txt +0 -0
  50. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/keywordTopWords +0 -0
  51. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/male-names +0 -0
  52. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/middleNames.txt +0 -0
  53. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/month.txt +0 -0
  54. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/mul +0 -0
  55. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/mul.label +0 -0
  56. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/mul.label.old +0 -0
  57. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/mul.processed +0 -0
  58. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/mulAuthor +0 -0
  59. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/mulClassStat +0 -0
  60. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/nickname.txt +0 -0
  61. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/nicknames.bin +0 -0
  62. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/note.txt +0 -0
  63. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/page.txt +0 -0
  64. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/phone.txt +0 -0
  65. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/postcode.txt +0 -0
  66. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/pubnum.txt +0 -0
  67. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/statename.bin +0 -0
  68. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/statename.txt +0 -0
  69. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/states_and_abbreviations.txt +0 -0
  70. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/stopwords +0 -0
  71. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/stopwords.bin +0 -0
  72. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/surNames.txt +0 -0
  73. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/surnames.bin +0 -0
  74. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/surnames_spec.bin +0 -0
  75. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list.txt +0 -0
  76. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/A.html +0 -0
  77. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/B.html +0 -0
  78. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/C.html +0 -0
  79. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/D.html +0 -0
  80. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/E.html +0 -0
  81. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/F.html +0 -0
  82. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/G.html +0 -0
  83. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/H.html +0 -0
  84. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/I.html +0 -0
  85. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/J.html +0 -0
  86. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/K.html +0 -0
  87. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/L.html +0 -0
  88. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/M.html +0 -0
  89. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/N.html +0 -0
  90. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/O.html +0 -0
  91. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/P.html +0 -0
  92. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/Q.html +0 -0
  93. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/R.html +0 -0
  94. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/S.html +0 -0
  95. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/T.html +0 -0
  96. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/U.html +0 -0
  97. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/V.html +0 -0
  98. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/W.html +0 -0
  99. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/WCSelect.gif +0 -0
  100. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/X.html +0 -0
  101. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/Y.html +0 -0
  102. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/Z.html +0 -0
  103. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/ae.html +0 -0
  104. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/am.html +0 -0
  105. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/ar.html +0 -0
  106. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/at.html +0 -0
  107. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/au.html +0 -0
  108. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/bd.html +0 -0
  109. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/be.html +0 -0
  110. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/bg.html +0 -0
  111. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/bh.html +0 -0
  112. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/blueribbon.gif +0 -0
  113. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/bm.html +0 -0
  114. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/bn.html +0 -0
  115. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/br.html +0 -0
  116. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/ca.html +0 -0
  117. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/ch.html +0 -0
  118. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/cl.html +0 -0
  119. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/cn.html +0 -0
  120. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/co.html +0 -0
  121. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/cr.html +0 -0
  122. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/cy.html +0 -0
  123. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/cz.html +0 -0
  124. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/de.html +0 -0
  125. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/dean-mainlink.jpg +0 -0
  126. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/dk.html +0 -0
  127. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/ec.html +0 -0
  128. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/ee.html +0 -0
  129. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/eg.html +0 -0
  130. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/es.html +0 -0
  131. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/et.html +0 -0
  132. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/faq.html +0 -0
  133. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/fi.html +0 -0
  134. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/fj.html +0 -0
  135. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/fo.html +0 -0
  136. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/fr.html +0 -0
  137. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/geog.html +0 -0
  138. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/gr.html +0 -0
  139. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/gu.html +0 -0
  140. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/hk.html +0 -0
  141. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/hr.html +0 -0
  142. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/hu.html +0 -0
  143. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/id.html +0 -0
  144. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/ie.html +0 -0
  145. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/il.html +0 -0
  146. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/in.html +0 -0
  147. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/is.html +0 -0
  148. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/it.html +0 -0
  149. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/jm.html +0 -0
  150. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/jo.html +0 -0
  151. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/jp.html +0 -0
  152. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/kaplan.gif +0 -0
  153. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/kr.html +0 -0
  154. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/kw.html +0 -0
  155. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/lb.html +0 -0
  156. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/linkbw2.gif +0 -0
  157. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/lk.html +0 -0
  158. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/lt.html +0 -0
  159. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/lu.html +0 -0
  160. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/lv.html +0 -0
  161. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/ma.html +0 -0
  162. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/maczynski.gif +0 -0
  163. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/mirror.tar +0 -0
  164. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/mk.html +0 -0
  165. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/mo.html +0 -0
  166. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/mseawdm.gif +0 -0
  167. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/mt.html +0 -0
  168. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/mx.html +0 -0
  169. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/my.html +0 -0
  170. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/ni.html +0 -0
  171. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/nl.html +0 -0
  172. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/no.html +0 -0
  173. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/nz.html +0 -0
  174. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/pa.html +0 -0
  175. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/pe.html +0 -0
  176. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/ph.html +0 -0
  177. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/pl.html +0 -0
  178. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/pointcom.gif +0 -0
  179. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/pr.html +0 -0
  180. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/ps.html +0 -0
  181. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/pt.html +0 -0
  182. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/recognition.html +0 -0
  183. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/results.html +0 -0
  184. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/ro.html +0 -0
  185. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/ru.html +0 -0
  186. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/sd.html +0 -0
  187. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/se.html +0 -0
  188. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/sg.html +0 -0
  189. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/si.html +0 -0
  190. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/sk.html +0 -0
  191. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/th.html +0 -0
  192. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/tr.html +0 -0
  193. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/tw.html +0 -0
  194. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/ua.html +0 -0
  195. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/uk.html +0 -0
  196. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/univ-full.html +0 -0
  197. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/univ.html +0 -0
  198. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/uy.html +0 -0
  199. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/ve.html +0 -0
  200. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/yu.html +0 -0
  201. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/za.html +0 -0
  202. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/zm.html +0 -0
  203. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/url.txt +0 -0
  204. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/webTopWords +0 -0
  205. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/words +0 -0
  206. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/models/10ContextModelfold1 +0 -0
  207. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/models/10Modelfold1 +0 -0
  208. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/models/11ContextModelfold1 +0 -0
  209. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/models/11Modelfold1 +0 -0
  210. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/models/12ContextModelfold1 +0 -0
  211. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/models/12Modelfold1 +0 -0
  212. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/models/13ContextModelfold1 +0 -0
  213. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/models/13Modelfold1 +0 -0
  214. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/models/14ContextModelfold1 +0 -0
  215. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/models/14Modelfold1 +0 -0
  216. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/models/15ContextModelfold1 +0 -0
  217. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/models/15Modelfold1 +0 -0
  218. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/models/1ContextModelfold1 +0 -0
  219. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/models/1Modelfold1 +0 -0
  220. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/models/2ContextModelfold1 +0 -0
  221. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/models/2Modelfold1 +0 -0
  222. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/models/3ContextModelfold1 +0 -0
  223. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/models/3Modelfold1 +0 -0
  224. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/models/4ContextModelfold1 +0 -0
  225. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/models/4Modelfold1 +0 -0
  226. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/models/5ContextModelfold1 +0 -0
  227. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/models/5Modelfold1 +0 -0
  228. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/models/6ContextModelfold1 +0 -0
  229. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/models/6Modelfold1 +0 -0
  230. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/models/7ContextModelfold1 +0 -0
  231. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/models/7Modelfold1 +0 -0
  232. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/models/8ContextModelfold1 +0 -0
  233. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/models/8Modelfold1 +0 -0
  234. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/models/9ContextModelfold1 +0 -0
  235. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/models/9Modelfold1 +0 -0
  236. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/models/NameSpaceModel +0 -0
  237. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/models/NameSpaceTrainF +0 -0
  238. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/models/WrapperBaseFeaDict +0 -0
  239. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/models/WrapperContextFeaDict +0 -0
  240. data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/models/WrapperSpaceAuthorFeaDict +0 -0
  241. data/sh/convert_to_text.sh +2 -1
  242. metadata +267 -282
  243. data/lib/biblicit/citeseer.rb +0 -42
  244. data/svm-header-parse/HeaderParseService/lib/CSXUtil/SafeText.pm +0 -140
  245. data/svm-header-parse/HeaderParseService/tmp/.gitignore +0 -4
  246. data/svm-header-parse/extract.pl +0 -75
data/biblicit.gemspec CHANGED
@@ -26,7 +26,6 @@ Gem::Specification.new do |gem|
26
26
  gem.add_development_dependency 'rake'
27
27
  gem.add_development_dependency 'rspec'
28
28
  gem.add_development_dependency 'pry'
29
- gem.add_development_dependency 'pry-debugger'
30
29
 
31
30
  gem.requirements << 'For PDFs, Poppler or XPDF (try "which pdftotext")'
32
31
  gem.requirements << 'For Postscript files, Ghostscript (try "which ps2ascii")'
@@ -1,7 +1,6 @@
1
1
  # encoding: UTF-8
2
2
 
3
3
  require 'biblicit/cb2bib'
4
- require 'biblicit/citeseer'
5
4
  require 'biblicit/parscit'
6
5
 
7
6
  require 'tempfile'
@@ -30,7 +29,7 @@ module Biblicit
30
29
 
31
30
  def self.extract_from_file(file, opts)
32
31
  file = File.realpath(file)
33
- tools = opts.delete(:tools) || [:parshed, :citeseer]
32
+ tools = opts.delete(:tools) || [:parshed]
34
33
 
35
34
  result = {}
36
35
 
@@ -38,11 +37,7 @@ module Biblicit
38
37
  `#{SH_DIR}/convert_to_text.sh #{file.shellescape} #{in_txt.path}`
39
38
 
40
39
  if tools.include?(:parshed)
41
- result.merge!( parshed: ParsCit.extract(in_txt, opts) )
42
- end
43
-
44
- if tools.include?(:citeseer)
45
- result.merge!( citeseer: CiteSeer.extract(in_txt, opts) )
40
+ result.merge!( ParsCit.extract(in_txt, opts) )
46
41
  end
47
42
 
48
43
  if tools.include?(:cb2bib)
@@ -17,20 +17,32 @@ module ParsCit
17
17
  attr_reader :result
18
18
 
19
19
  def initialize(in_txt, opts={})
20
+ mode = (opts.fetch :include_citations, false) ? 'extract_all' : 'extract_header'
20
21
  ENV['CRFPP_HOME'] ||= "#{File.dirname(`which crf_test`)}/../"
21
- output = `#{PERL_DIR}/bin/citeExtract.pl -q -m extract_all #{in_txt.path}`
22
+ output = `#{PERL_DIR}/bin/citeExtract.pl -q -m #{mode} #{in_txt.path}`
22
23
  @result = parse(Nokogiri::XML output)
23
24
  end
24
25
 
25
26
  private
26
27
 
27
28
  def parse(xml)
28
- parsed = xml.css("algorithm[name=ParsHed]")
29
+ result = {}
30
+
31
+ parshed = xml.css("algorithm[name=ParsHed]")
32
+ result[:parshed] = {
33
+ title: parshed.css('title').text.gsub(/\s+/,' ').strip,
34
+ authors: parshed.css('author').map { |a| a.text.gsub(/\s+/,' ').strip },
35
+ abstract: parshed.css('abstract').text#,
36
+ #confidence: parshed.css('title').attr('confidence').value.to_f
37
+ }
29
38
 
30
- result = {
31
- title: parsed.css('title').text.gsub(/\s+/,' ').strip,
32
- authors: parsed.css('author').map { |a| a.text.gsub(/\s+/,' ').strip },
33
- abstract: parsed.css('abstract').text
39
+ svm = xml.css('algorithm[name="SVM HeaderParse"]')
40
+ result[:citeseer] = {
41
+ title: svm.css('title').text,
42
+ authors: svm.css('author > name').map { |n| n.text.strip }.reject(&:blank?).uniq,
43
+ author_emails: svm.css('author > email').map { |n| n.text.strip }.reject(&:blank?).uniq,
44
+ abstract: svm.css('abstract').text,
45
+ valid: svm.css('validHeader').first.try(:text) == '1'
34
46
  }
35
47
 
36
48
  citations = xml.css('algorithm[name=ParsCit] > citationList > citation').map do |node|
@@ -2,6 +2,6 @@
2
2
 
3
3
  module Biblicit
4
4
 
5
- VERSION = '2.1.0'
5
+ VERSION = '2.2.0'
6
6
 
7
7
  end
@@ -33,8 +33,8 @@ use Getopt::Std;
33
33
  use strict 'vars';
34
34
  use lib $FindBin::Bin . "/../lib";
35
35
 
36
- use lib "/home/wing.nus/tools/languages/programming/perl-5.10.0/lib/5.10.0";
37
- use lib "/home/wing.nus/tools/languages/programming/perl-5.10.0/lib/site_perl/5.10.0";
36
+ #use lib "/home/wing.nus/tools/languages/programming/perl-5.10.0/lib/5.10.0";
37
+ #use lib "/home/wing.nus/tools/languages/programming/perl-5.10.0/lib/site_perl/5.10.0";
38
38
 
39
39
  # Dependencies
40
40
  use File::Spec;
@@ -42,6 +42,9 @@ use File::Basename;
42
42
 
43
43
  # Local libraries
44
44
  use ParsCit::Controller;
45
+
46
+ use HeaderParse::API::Parser;
47
+ use HeaderParse::Config::API_Config;
45
48
 
46
49
  # USER customizable section
47
50
  my $tmpfile .= $0;
@@ -58,6 +61,7 @@ my $progname = $1;
58
61
  my $PARSCIT = 1;
59
62
  my $PARSHED = 2;
60
63
  my $SECTLABEL = 4; # Thang v100401
64
+ my $SVM = 8;
61
65
 
62
66
  my $default_input_type = "raw";
63
67
  my $output_version = "110505";
@@ -228,6 +232,14 @@ if (($mode & $PARSCIT) == $PARSCIT)
228
232
  if (scalar(@export_types) != 0) { BiblioScript(\@export_types, $$pc_xml, $out); }
229
233
  }
230
234
 
235
+ # SVM HEADER PARSE
236
+ if (($mode & $SVM) == $SVM)
237
+ {
238
+ my $svm_xml = HeaderParse::API::Parser::extractHeader($text_file);
239
+
240
+ $rxml .= $$$svm_xml . "\n";
241
+ }
242
+
231
243
  $rxml .= "</algorithms>";
232
244
 
233
245
  if (defined $out)
@@ -267,7 +279,7 @@ sub ParseMode
267
279
  }
268
280
  elsif ($arg eq "extract_header")
269
281
  {
270
- return $PARSHED;
282
+ return ($PARSHED | $SVM);
271
283
  }
272
284
  elsif ($arg eq "extract_citations")
273
285
  {
@@ -279,7 +291,7 @@ sub ParseMode
279
291
  }
280
292
  elsif ($arg eq "extract_all")
281
293
  {
282
- return ($PARSHED | $PARSCIT | $SECTLABEL);
294
+ return ($PARSHED | $PARSCIT | $SECTLABEL | $SVM);
283
295
  }
284
296
  else
285
297
  {
@@ -20,6 +20,27 @@ use HeaderParse::Config::API_Config;
20
20
  use vars qw($ServerURL $repositoryLocation $algVersion);
21
21
 
22
22
 
23
+ sub extractHeader {
24
+ my ($textFile) = @_;
25
+
26
+ my $jobID;
27
+ while($jobID = rand(time)) {
28
+ unless(-f $offlineD."$jobID") {
29
+ last;
30
+ }
31
+ }
32
+
33
+ my ($status, $msg, $rXML) = _parseHeader($textFile, $jobID);
34
+
35
+ if ($status <= 0) {
36
+ my $error = "Error: $msg";
37
+ return \$error;
38
+ }
39
+ else {
40
+ return \$rXML;
41
+ }
42
+ }
43
+
23
44
  sub _parseHeader{
24
45
  my ($fileID, $jobID) = @_;
25
46
  my ($header, $faultMessage, $rResponse, $success, $papertext);
@@ -19,12 +19,6 @@ use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
19
19
  @ISA = qw(Exporter);
20
20
  @EXPORT = qw($Classifier $offlineD $Database_Dir $Data_Dir $Tmp_Dir $nMinHeaderLength $nMaxHeaderLength $ServerURL $ServerPort $algName $algVersion);
21
21
 
22
- #$Database_Dir = "$FindBin::Bin/../lib/HeaderParse/database";
23
- #$Data_Dir = "$FindBin::Bin/../lib/HeaderParse/data/";
24
- #$offlineD = "$FindBin::Bin/../lib/HeaderParse/OfflineFiles/";
25
-
26
- $HeaderParseHome = "$FindBin::Bin/HeaderParseService";
27
-
28
22
  if ($ENV{'SVM_LIGHT_HOME'}.length) {
29
23
  $Classifier = "$ENV{'SVM_LIGHT_HOME'}/svm_classify"
30
24
  }
@@ -32,10 +26,17 @@ else {
32
26
  $Classifier = "svm_classify5"; # assume on path
33
27
  }
34
28
 
35
- $Database_Dir = "$HeaderParseHome/resources/database/";
36
- $Data_Dir = "$HeaderParseHome/resources/data/";
37
- $offlineD = "$HeaderParseHome/resources/models/";
38
- $Tmp_Dir = "$HeaderParseHome/tmp";
29
+ my $path;
30
+ BEGIN
31
+ {
32
+ if ($FindBin::Bin =~ /(.*)/) { $path = $1; }
33
+ }
34
+
35
+ $Resource_Dir = "$path/../resources/headerParse";
36
+ $Database_Dir = "$Resource_Dir/database/";
37
+ $Data_Dir = "$Resource_Dir/data/";
38
+ $offlineD = "$Resource_Dir/models/";
39
+ $Tmp_Dir = "$path/../tmp";
39
40
 
40
41
  $nMinHeaderLength = 50;
41
42
  $nMaxHeaderLength = 2500;