biblicit 2.1.0 → 2.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/biblicit.gemspec +0 -1
- data/lib/biblicit/extractor.rb +2 -7
- data/lib/biblicit/parscit.rb +18 -6
- data/lib/biblicit/version.rb +1 -1
- data/parscit/bin/citeExtract.pl +16 -4
- data/{svm-header-parse/HeaderParseService → parscit}/lib/HeaderParse/API/AssembleXMLMetadata.pm +0 -0
- data/{svm-header-parse/HeaderParseService → parscit}/lib/HeaderParse/API/Function.pm +0 -0
- data/{svm-header-parse/HeaderParseService → parscit}/lib/HeaderParse/API/LoadInformation.pm +0 -0
- data/{svm-header-parse/HeaderParseService → parscit}/lib/HeaderParse/API/MultiClassChunking.pm +0 -0
- data/{svm-header-parse/HeaderParseService → parscit}/lib/HeaderParse/API/NamePatternMatch.pm +0 -0
- data/{svm-header-parse/HeaderParseService → parscit}/lib/HeaderParse/API/Parser.pm +21 -0
- data/{svm-header-parse/HeaderParseService → parscit}/lib/HeaderParse/API/ParserMethods.pm +0 -0
- data/{svm-header-parse/HeaderParseService → parscit}/lib/HeaderParse/Config/API_Config.pm +11 -10
- data/{svm-header-parse/HeaderParseService → parscit/lib/HeaderParse}/README.TXT +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/50states +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/AddrTopWords.txt +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/AffiTopWords.txt +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/AffiTopWordsAll.txt +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/ChineseSurNames.txt +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/Csurnames.bin +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/Csurnames_spec.bin +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/DomainSuffixes.txt +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/LabeledHeader +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/README +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/TrainMulClassLines +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/TrainMulClassLines1 +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/abstract.txt +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/abstractTopWords +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/addr.txt +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/affi.txt +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/affis.bin +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/all_namewords_spec.bin +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/allnamewords.bin +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/cities_US.txt +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/cities_world.txt +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/city.txt +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/cityname.txt +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/country_abbr.txt +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/countryname.txt +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/dateTopWords +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/degree.txt +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/email.txt +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/excludeWords.txt +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/female-names +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/firstNames.txt +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/firstnames.bin +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/firstnames_spec.bin +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/intro.txt +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/keyword.txt +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/keywordTopWords +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/male-names +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/middleNames.txt +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/month.txt +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/mul +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/mul.label +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/mul.label.old +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/mul.processed +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/mulAuthor +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/mulClassStat +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/nickname.txt +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/nicknames.bin +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/note.txt +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/page.txt +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/phone.txt +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/postcode.txt +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/pubnum.txt +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/statename.bin +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/statename.txt +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/states_and_abbreviations.txt +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/stopwords +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/stopwords.bin +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/surNames.txt +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/surnames.bin +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/surnames_spec.bin +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list.txt +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/A.html +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/B.html +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/C.html +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/D.html +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/E.html +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/F.html +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/G.html +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/H.html +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/I.html +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/J.html +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/K.html +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/L.html +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/M.html +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/N.html +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/O.html +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/P.html +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/Q.html +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/R.html +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/S.html +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/T.html +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/U.html +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/V.html +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/W.html +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/WCSelect.gif +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/X.html +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/Y.html +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/Z.html +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/ae.html +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/am.html +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/ar.html +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/at.html +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/au.html +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/bd.html +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/be.html +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/bg.html +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/bh.html +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/blueribbon.gif +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/bm.html +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/bn.html +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/br.html +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/ca.html +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/ch.html +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/cl.html +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/cn.html +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/co.html +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/cr.html +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/cy.html +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/cz.html +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/de.html +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/dean-mainlink.jpg +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/dk.html +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/ec.html +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/ee.html +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/eg.html +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/es.html +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/et.html +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/faq.html +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/fi.html +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/fj.html +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/fo.html +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/fr.html +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/geog.html +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/gr.html +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/gu.html +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/hk.html +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/hr.html +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/hu.html +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/id.html +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/ie.html +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/il.html +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/in.html +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/is.html +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/it.html +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/jm.html +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/jo.html +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/jp.html +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/kaplan.gif +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/kr.html +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/kw.html +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/lb.html +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/linkbw2.gif +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/lk.html +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/lt.html +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/lu.html +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/lv.html +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/ma.html +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/maczynski.gif +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/mirror.tar +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/mk.html +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/mo.html +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/mseawdm.gif +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/mt.html +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/mx.html +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/my.html +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/ni.html +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/nl.html +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/no.html +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/nz.html +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/pa.html +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/pe.html +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/ph.html +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/pl.html +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/pointcom.gif +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/pr.html +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/ps.html +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/pt.html +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/recognition.html +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/results.html +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/ro.html +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/ru.html +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/sd.html +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/se.html +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/sg.html +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/si.html +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/sk.html +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/th.html +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/tr.html +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/tw.html +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/ua.html +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/uk.html +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/univ-full.html +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/univ.html +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/uy.html +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/ve.html +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/yu.html +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/za.html +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/university_list/zm.html +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/url.txt +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/webTopWords +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/words +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/models/10ContextModelfold1 +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/models/10Modelfold1 +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/models/11ContextModelfold1 +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/models/11Modelfold1 +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/models/12ContextModelfold1 +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/models/12Modelfold1 +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/models/13ContextModelfold1 +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/models/13Modelfold1 +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/models/14ContextModelfold1 +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/models/14Modelfold1 +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/models/15ContextModelfold1 +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/models/15Modelfold1 +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/models/1ContextModelfold1 +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/models/1Modelfold1 +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/models/2ContextModelfold1 +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/models/2Modelfold1 +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/models/3ContextModelfold1 +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/models/3Modelfold1 +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/models/4ContextModelfold1 +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/models/4Modelfold1 +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/models/5ContextModelfold1 +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/models/5Modelfold1 +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/models/6ContextModelfold1 +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/models/6Modelfold1 +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/models/7ContextModelfold1 +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/models/7Modelfold1 +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/models/8ContextModelfold1 +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/models/8Modelfold1 +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/models/9ContextModelfold1 +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/models/9Modelfold1 +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/models/NameSpaceModel +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/models/NameSpaceTrainF +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/models/WrapperBaseFeaDict +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/models/WrapperContextFeaDict +0 -0
- data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/models/WrapperSpaceAuthorFeaDict +0 -0
- data/sh/convert_to_text.sh +2 -1
- metadata +267 -282
- data/lib/biblicit/citeseer.rb +0 -42
- data/svm-header-parse/HeaderParseService/lib/CSXUtil/SafeText.pm +0 -140
- data/svm-header-parse/HeaderParseService/tmp/.gitignore +0 -4
- data/svm-header-parse/extract.pl +0 -75
data/biblicit.gemspec
CHANGED
@@ -26,7 +26,6 @@ Gem::Specification.new do |gem|
|
|
26
26
|
gem.add_development_dependency 'rake'
|
27
27
|
gem.add_development_dependency 'rspec'
|
28
28
|
gem.add_development_dependency 'pry'
|
29
|
-
gem.add_development_dependency 'pry-debugger'
|
30
29
|
|
31
30
|
gem.requirements << 'For PDFs, Poppler or XPDF (try "which pdftotext")'
|
32
31
|
gem.requirements << 'For Postscript files, Ghostscript (try "which ps2ascii")'
|
data/lib/biblicit/extractor.rb
CHANGED
@@ -1,7 +1,6 @@
|
|
1
1
|
# encoding: UTF-8
|
2
2
|
|
3
3
|
require 'biblicit/cb2bib'
|
4
|
-
require 'biblicit/citeseer'
|
5
4
|
require 'biblicit/parscit'
|
6
5
|
|
7
6
|
require 'tempfile'
|
@@ -30,7 +29,7 @@ module Biblicit
|
|
30
29
|
|
31
30
|
def self.extract_from_file(file, opts)
|
32
31
|
file = File.realpath(file)
|
33
|
-
tools = opts.delete(:tools) || [:parshed
|
32
|
+
tools = opts.delete(:tools) || [:parshed]
|
34
33
|
|
35
34
|
result = {}
|
36
35
|
|
@@ -38,11 +37,7 @@ module Biblicit
|
|
38
37
|
`#{SH_DIR}/convert_to_text.sh #{file.shellescape} #{in_txt.path}`
|
39
38
|
|
40
39
|
if tools.include?(:parshed)
|
41
|
-
result.merge!(
|
42
|
-
end
|
43
|
-
|
44
|
-
if tools.include?(:citeseer)
|
45
|
-
result.merge!( citeseer: CiteSeer.extract(in_txt, opts) )
|
40
|
+
result.merge!( ParsCit.extract(in_txt, opts) )
|
46
41
|
end
|
47
42
|
|
48
43
|
if tools.include?(:cb2bib)
|
data/lib/biblicit/parscit.rb
CHANGED
@@ -17,20 +17,32 @@ module ParsCit
|
|
17
17
|
attr_reader :result
|
18
18
|
|
19
19
|
def initialize(in_txt, opts={})
|
20
|
+
mode = (opts.fetch :include_citations, false) ? 'extract_all' : 'extract_header'
|
20
21
|
ENV['CRFPP_HOME'] ||= "#{File.dirname(`which crf_test`)}/../"
|
21
|
-
output = `#{PERL_DIR}/bin/citeExtract.pl -q -m
|
22
|
+
output = `#{PERL_DIR}/bin/citeExtract.pl -q -m #{mode} #{in_txt.path}`
|
22
23
|
@result = parse(Nokogiri::XML output)
|
23
24
|
end
|
24
25
|
|
25
26
|
private
|
26
27
|
|
27
28
|
def parse(xml)
|
28
|
-
|
29
|
+
result = {}
|
30
|
+
|
31
|
+
parshed = xml.css("algorithm[name=ParsHed]")
|
32
|
+
result[:parshed] = {
|
33
|
+
title: parshed.css('title').text.gsub(/\s+/,' ').strip,
|
34
|
+
authors: parshed.css('author').map { |a| a.text.gsub(/\s+/,' ').strip },
|
35
|
+
abstract: parshed.css('abstract').text#,
|
36
|
+
#confidence: parshed.css('title').attr('confidence').value.to_f
|
37
|
+
}
|
29
38
|
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
39
|
+
svm = xml.css('algorithm[name="SVM HeaderParse"]')
|
40
|
+
result[:citeseer] = {
|
41
|
+
title: svm.css('title').text,
|
42
|
+
authors: svm.css('author > name').map { |n| n.text.strip }.reject(&:blank?).uniq,
|
43
|
+
author_emails: svm.css('author > email').map { |n| n.text.strip }.reject(&:blank?).uniq,
|
44
|
+
abstract: svm.css('abstract').text,
|
45
|
+
valid: svm.css('validHeader').first.try(:text) == '1'
|
34
46
|
}
|
35
47
|
|
36
48
|
citations = xml.css('algorithm[name=ParsCit] > citationList > citation').map do |node|
|
data/lib/biblicit/version.rb
CHANGED
data/parscit/bin/citeExtract.pl
CHANGED
@@ -33,8 +33,8 @@ use Getopt::Std;
|
|
33
33
|
use strict 'vars';
|
34
34
|
use lib $FindBin::Bin . "/../lib";
|
35
35
|
|
36
|
-
use lib "/home/wing.nus/tools/languages/programming/perl-5.10.0/lib/5.10.0";
|
37
|
-
use lib "/home/wing.nus/tools/languages/programming/perl-5.10.0/lib/site_perl/5.10.0";
|
36
|
+
#use lib "/home/wing.nus/tools/languages/programming/perl-5.10.0/lib/5.10.0";
|
37
|
+
#use lib "/home/wing.nus/tools/languages/programming/perl-5.10.0/lib/site_perl/5.10.0";
|
38
38
|
|
39
39
|
# Dependencies
|
40
40
|
use File::Spec;
|
@@ -42,6 +42,9 @@ use File::Basename;
|
|
42
42
|
|
43
43
|
# Local libraries
|
44
44
|
use ParsCit::Controller;
|
45
|
+
|
46
|
+
use HeaderParse::API::Parser;
|
47
|
+
use HeaderParse::Config::API_Config;
|
45
48
|
|
46
49
|
# USER customizable section
|
47
50
|
my $tmpfile .= $0;
|
@@ -58,6 +61,7 @@ my $progname = $1;
|
|
58
61
|
my $PARSCIT = 1;
|
59
62
|
my $PARSHED = 2;
|
60
63
|
my $SECTLABEL = 4; # Thang v100401
|
64
|
+
my $SVM = 8;
|
61
65
|
|
62
66
|
my $default_input_type = "raw";
|
63
67
|
my $output_version = "110505";
|
@@ -228,6 +232,14 @@ if (($mode & $PARSCIT) == $PARSCIT)
|
|
228
232
|
if (scalar(@export_types) != 0) { BiblioScript(\@export_types, $$pc_xml, $out); }
|
229
233
|
}
|
230
234
|
|
235
|
+
# SVM HEADER PARSE
|
236
|
+
if (($mode & $SVM) == $SVM)
|
237
|
+
{
|
238
|
+
my $svm_xml = HeaderParse::API::Parser::extractHeader($text_file);
|
239
|
+
|
240
|
+
$rxml .= $$$svm_xml . "\n";
|
241
|
+
}
|
242
|
+
|
231
243
|
$rxml .= "</algorithms>";
|
232
244
|
|
233
245
|
if (defined $out)
|
@@ -267,7 +279,7 @@ sub ParseMode
|
|
267
279
|
}
|
268
280
|
elsif ($arg eq "extract_header")
|
269
281
|
{
|
270
|
-
return $PARSHED;
|
282
|
+
return ($PARSHED | $SVM);
|
271
283
|
}
|
272
284
|
elsif ($arg eq "extract_citations")
|
273
285
|
{
|
@@ -279,7 +291,7 @@ sub ParseMode
|
|
279
291
|
}
|
280
292
|
elsif ($arg eq "extract_all")
|
281
293
|
{
|
282
|
-
return ($PARSHED | $PARSCIT | $SECTLABEL);
|
294
|
+
return ($PARSHED | $PARSCIT | $SECTLABEL | $SVM);
|
283
295
|
}
|
284
296
|
else
|
285
297
|
{
|
data/{svm-header-parse/HeaderParseService → parscit}/lib/HeaderParse/API/AssembleXMLMetadata.pm
RENAMED
File without changes
|
File without changes
|
File without changes
|
data/{svm-header-parse/HeaderParseService → parscit}/lib/HeaderParse/API/MultiClassChunking.pm
RENAMED
File without changes
|
data/{svm-header-parse/HeaderParseService → parscit}/lib/HeaderParse/API/NamePatternMatch.pm
RENAMED
File without changes
|
@@ -20,6 +20,27 @@ use HeaderParse::Config::API_Config;
|
|
20
20
|
use vars qw($ServerURL $repositoryLocation $algVersion);
|
21
21
|
|
22
22
|
|
23
|
+
sub extractHeader {
|
24
|
+
my ($textFile) = @_;
|
25
|
+
|
26
|
+
my $jobID;
|
27
|
+
while($jobID = rand(time)) {
|
28
|
+
unless(-f $offlineD."$jobID") {
|
29
|
+
last;
|
30
|
+
}
|
31
|
+
}
|
32
|
+
|
33
|
+
my ($status, $msg, $rXML) = _parseHeader($textFile, $jobID);
|
34
|
+
|
35
|
+
if ($status <= 0) {
|
36
|
+
my $error = "Error: $msg";
|
37
|
+
return \$error;
|
38
|
+
}
|
39
|
+
else {
|
40
|
+
return \$rXML;
|
41
|
+
}
|
42
|
+
}
|
43
|
+
|
23
44
|
sub _parseHeader{
|
24
45
|
my ($fileID, $jobID) = @_;
|
25
46
|
my ($header, $faultMessage, $rResponse, $success, $papertext);
|
File without changes
|
@@ -19,12 +19,6 @@ use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
|
|
19
19
|
@ISA = qw(Exporter);
|
20
20
|
@EXPORT = qw($Classifier $offlineD $Database_Dir $Data_Dir $Tmp_Dir $nMinHeaderLength $nMaxHeaderLength $ServerURL $ServerPort $algName $algVersion);
|
21
21
|
|
22
|
-
#$Database_Dir = "$FindBin::Bin/../lib/HeaderParse/database";
|
23
|
-
#$Data_Dir = "$FindBin::Bin/../lib/HeaderParse/data/";
|
24
|
-
#$offlineD = "$FindBin::Bin/../lib/HeaderParse/OfflineFiles/";
|
25
|
-
|
26
|
-
$HeaderParseHome = "$FindBin::Bin/HeaderParseService";
|
27
|
-
|
28
22
|
if ($ENV{'SVM_LIGHT_HOME'}.length) {
|
29
23
|
$Classifier = "$ENV{'SVM_LIGHT_HOME'}/svm_classify"
|
30
24
|
}
|
@@ -32,10 +26,17 @@ else {
|
|
32
26
|
$Classifier = "svm_classify5"; # assume on path
|
33
27
|
}
|
34
28
|
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
$
|
29
|
+
my $path;
|
30
|
+
BEGIN
|
31
|
+
{
|
32
|
+
if ($FindBin::Bin =~ /(.*)/) { $path = $1; }
|
33
|
+
}
|
34
|
+
|
35
|
+
$Resource_Dir = "$path/../resources/headerParse";
|
36
|
+
$Database_Dir = "$Resource_Dir/database/";
|
37
|
+
$Data_Dir = "$Resource_Dir/data/";
|
38
|
+
$offlineD = "$Resource_Dir/models/";
|
39
|
+
$Tmp_Dir = "$path/../tmp";
|
39
40
|
|
40
41
|
$nMinHeaderLength = 50;
|
41
42
|
$nMaxHeaderLength = 2500;
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/README
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
data/{svm-header-parse/HeaderParseService/resources → parscit/resources/headerParse}/database/mul
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|