biblicit 1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +3 -0
- data/.rspec +1 -0
- data/Gemfile +6 -0
- data/LICENSE.TXT +176 -0
- data/README.md +120 -0
- data/Rakefile +8 -0
- data/biblicit.gemspec +33 -0
- data/lib/biblicit/cb2bib.rb +83 -0
- data/lib/biblicit/citeseer.rb +53 -0
- data/lib/biblicit/extractor.rb +37 -0
- data/lib/biblicit.rb +6 -0
- data/perl/DocFilter/lib/CSXUtil/SafeText.pm +140 -0
- data/perl/DocFilter/lib/DocFilter/Config.pm +35 -0
- data/perl/DocFilter/lib/DocFilter/Filter.pm +51 -0
- data/perl/FileConversionService/README.TXT +11 -0
- data/perl/FileConversionService/converters/PDFBox/pdfbox-app-1.7.1.jar +0 -0
- data/perl/FileConversionService/lib/CSXUtil/SafeText.pm +140 -0
- data/perl/FileConversionService/lib/FileConverter/CheckSum.pm +77 -0
- data/perl/FileConversionService/lib/FileConverter/Compression.pm +137 -0
- data/perl/FileConversionService/lib/FileConverter/Config.pm +57 -0
- data/perl/FileConversionService/lib/FileConverter/Controller.pm +191 -0
- data/perl/FileConversionService/lib/FileConverter/JODConverter.pm +61 -0
- data/perl/FileConversionService/lib/FileConverter/PDFBox.pm +69 -0
- data/perl/FileConversionService/lib/FileConverter/PSConverter.pm +69 -0
- data/perl/FileConversionService/lib/FileConverter/PSToText.pm +88 -0
- data/perl/FileConversionService/lib/FileConverter/Prescript.pm +68 -0
- data/perl/FileConversionService/lib/FileConverter/TET.pm +75 -0
- data/perl/FileConversionService/lib/FileConverter/Utils.pm +130 -0
- data/perl/HeaderParseService/README.TXT +80 -0
- data/perl/HeaderParseService/lib/CSXUtil/SafeText.pm +140 -0
- data/perl/HeaderParseService/lib/HeaderParse/API/AssembleXMLMetadata.pm +968 -0
- data/perl/HeaderParseService/lib/HeaderParse/API/Function.pm +2016 -0
- data/perl/HeaderParseService/lib/HeaderParse/API/LoadInformation.pm +444 -0
- data/perl/HeaderParseService/lib/HeaderParse/API/MultiClassChunking.pm +409 -0
- data/perl/HeaderParseService/lib/HeaderParse/API/NamePatternMatch.pm +537 -0
- data/perl/HeaderParseService/lib/HeaderParse/API/Parser.pm +68 -0
- data/perl/HeaderParseService/lib/HeaderParse/API/ParserMethods.pm +1880 -0
- data/perl/HeaderParseService/lib/HeaderParse/Config/API_Config.pm +46 -0
- data/perl/HeaderParseService/resources/data/EbizHeaders.txt +24330 -0
- data/perl/HeaderParseService/resources/data/EbizHeaders.txt.parsed +27506 -0
- data/perl/HeaderParseService/resources/data/EbizHeaders.txt.parsed.old +26495 -0
- data/perl/HeaderParseService/resources/data/tagged_headers.txt +40668 -0
- data/perl/HeaderParseService/resources/data/test_header.txt +31 -0
- data/perl/HeaderParseService/resources/data/test_header.txt.parsed +31 -0
- data/perl/HeaderParseService/resources/database/50states +60 -0
- data/perl/HeaderParseService/resources/database/AddrTopWords.txt +17 -0
- data/perl/HeaderParseService/resources/database/AffiTopWords.txt +35 -0
- data/perl/HeaderParseService/resources/database/AffiTopWordsAll.txt +533 -0
- data/perl/HeaderParseService/resources/database/ChineseSurNames.txt +276 -0
- data/perl/HeaderParseService/resources/database/Csurnames.bin +0 -0
- data/perl/HeaderParseService/resources/database/Csurnames_spec.bin +0 -0
- data/perl/HeaderParseService/resources/database/DomainSuffixes.txt +242 -0
- data/perl/HeaderParseService/resources/database/LabeledHeader +18 -0
- data/perl/HeaderParseService/resources/database/README +2 -0
- data/perl/HeaderParseService/resources/database/TrainMulClassLines +254 -0
- data/perl/HeaderParseService/resources/database/TrainMulClassLines1 +510 -0
- data/perl/HeaderParseService/resources/database/abstract.txt +1 -0
- data/perl/HeaderParseService/resources/database/abstractTopWords +9 -0
- data/perl/HeaderParseService/resources/database/addr.txt +28 -0
- data/perl/HeaderParseService/resources/database/affi.txt +34 -0
- data/perl/HeaderParseService/resources/database/affis.bin +0 -0
- data/perl/HeaderParseService/resources/database/all_namewords_spec.bin +0 -0
- data/perl/HeaderParseService/resources/database/allnamewords.bin +0 -0
- data/perl/HeaderParseService/resources/database/cities_US.txt +4512 -0
- data/perl/HeaderParseService/resources/database/cities_world.txt +4463 -0
- data/perl/HeaderParseService/resources/database/city.txt +3150 -0
- data/perl/HeaderParseService/resources/database/cityname.txt +3151 -0
- data/perl/HeaderParseService/resources/database/country_abbr.txt +243 -0
- data/perl/HeaderParseService/resources/database/countryname.txt +262 -0
- data/perl/HeaderParseService/resources/database/dateTopWords +30 -0
- data/perl/HeaderParseService/resources/database/degree.txt +67 -0
- data/perl/HeaderParseService/resources/database/email.txt +3 -0
- data/perl/HeaderParseService/resources/database/excludeWords.txt +40 -0
- data/perl/HeaderParseService/resources/database/female-names +4960 -0
- data/perl/HeaderParseService/resources/database/firstNames.txt +8448 -0
- data/perl/HeaderParseService/resources/database/firstnames.bin +0 -0
- data/perl/HeaderParseService/resources/database/firstnames_spec.bin +0 -0
- data/perl/HeaderParseService/resources/database/intro.txt +2 -0
- data/perl/HeaderParseService/resources/database/keyword.txt +5 -0
- data/perl/HeaderParseService/resources/database/keywordTopWords +7 -0
- data/perl/HeaderParseService/resources/database/male-names +3906 -0
- data/perl/HeaderParseService/resources/database/middleNames.txt +2 -0
- data/perl/HeaderParseService/resources/database/month.txt +35 -0
- data/perl/HeaderParseService/resources/database/mul +868 -0
- data/perl/HeaderParseService/resources/database/mul.label +869 -0
- data/perl/HeaderParseService/resources/database/mul.label.old +869 -0
- data/perl/HeaderParseService/resources/database/mul.processed +762 -0
- data/perl/HeaderParseService/resources/database/mulAuthor +619 -0
- data/perl/HeaderParseService/resources/database/mulClassStat +45 -0
- data/perl/HeaderParseService/resources/database/nickname.txt +58 -0
- data/perl/HeaderParseService/resources/database/nicknames.bin +0 -0
- data/perl/HeaderParseService/resources/database/note.txt +121 -0
- data/perl/HeaderParseService/resources/database/page.txt +1 -0
- data/perl/HeaderParseService/resources/database/phone.txt +9 -0
- data/perl/HeaderParseService/resources/database/postcode.txt +54 -0
- data/perl/HeaderParseService/resources/database/pubnum.txt +45 -0
- data/perl/HeaderParseService/resources/database/statename.bin +0 -0
- data/perl/HeaderParseService/resources/database/statename.txt +73 -0
- data/perl/HeaderParseService/resources/database/states_and_abbreviations.txt +118 -0
- data/perl/HeaderParseService/resources/database/stopwords +438 -0
- data/perl/HeaderParseService/resources/database/stopwords.bin +0 -0
- data/perl/HeaderParseService/resources/database/surNames.txt +19613 -0
- data/perl/HeaderParseService/resources/database/surnames.bin +0 -0
- data/perl/HeaderParseService/resources/database/surnames_spec.bin +0 -0
- data/perl/HeaderParseService/resources/database/university_list/A.html +167 -0
- data/perl/HeaderParseService/resources/database/university_list/B.html +161 -0
- data/perl/HeaderParseService/resources/database/university_list/C.html +288 -0
- data/perl/HeaderParseService/resources/database/university_list/D.html +115 -0
- data/perl/HeaderParseService/resources/database/university_list/E.html +147 -0
- data/perl/HeaderParseService/resources/database/university_list/F.html +112 -0
- data/perl/HeaderParseService/resources/database/university_list/G.html +115 -0
- data/perl/HeaderParseService/resources/database/university_list/H.html +140 -0
- data/perl/HeaderParseService/resources/database/university_list/I.html +138 -0
- data/perl/HeaderParseService/resources/database/university_list/J.html +82 -0
- data/perl/HeaderParseService/resources/database/university_list/K.html +115 -0
- data/perl/HeaderParseService/resources/database/university_list/L.html +131 -0
- data/perl/HeaderParseService/resources/database/university_list/M.html +201 -0
- data/perl/HeaderParseService/resources/database/university_list/N.html +204 -0
- data/perl/HeaderParseService/resources/database/university_list/O.html +89 -0
- data/perl/HeaderParseService/resources/database/university_list/P.html +125 -0
- data/perl/HeaderParseService/resources/database/university_list/Q.html +49 -0
- data/perl/HeaderParseService/resources/database/university_list/R.html +126 -0
- data/perl/HeaderParseService/resources/database/university_list/S.html +296 -0
- data/perl/HeaderParseService/resources/database/university_list/T.html +156 -0
- data/perl/HeaderParseService/resources/database/university_list/U.html +800 -0
- data/perl/HeaderParseService/resources/database/university_list/V.html +75 -0
- data/perl/HeaderParseService/resources/database/university_list/W.html +144 -0
- data/perl/HeaderParseService/resources/database/university_list/WCSelect.gif +0 -0
- data/perl/HeaderParseService/resources/database/university_list/X.html +44 -0
- data/perl/HeaderParseService/resources/database/university_list/Y.html +53 -0
- data/perl/HeaderParseService/resources/database/university_list/Z.html +43 -0
- data/perl/HeaderParseService/resources/database/university_list/ae.html +31 -0
- data/perl/HeaderParseService/resources/database/university_list/am.html +30 -0
- data/perl/HeaderParseService/resources/database/university_list/ar.html +35 -0
- data/perl/HeaderParseService/resources/database/university_list/at.html +43 -0
- data/perl/HeaderParseService/resources/database/university_list/au.html +82 -0
- data/perl/HeaderParseService/resources/database/university_list/bd.html +28 -0
- data/perl/HeaderParseService/resources/database/university_list/be.html +41 -0
- data/perl/HeaderParseService/resources/database/university_list/bg.html +28 -0
- data/perl/HeaderParseService/resources/database/university_list/bh.html +28 -0
- data/perl/HeaderParseService/resources/database/university_list/blueribbon.gif +0 -0
- data/perl/HeaderParseService/resources/database/university_list/bm.html +28 -0
- data/perl/HeaderParseService/resources/database/university_list/bn.html +28 -0
- data/perl/HeaderParseService/resources/database/university_list/br.html +66 -0
- data/perl/HeaderParseService/resources/database/university_list/ca.html +174 -0
- data/perl/HeaderParseService/resources/database/university_list/ch.html +52 -0
- data/perl/HeaderParseService/resources/database/university_list/cl.html +40 -0
- data/perl/HeaderParseService/resources/database/university_list/cn.html +87 -0
- data/perl/HeaderParseService/resources/database/university_list/co.html +39 -0
- data/perl/HeaderParseService/resources/database/university_list/cr.html +34 -0
- data/perl/HeaderParseService/resources/database/university_list/cy.html +34 -0
- data/perl/HeaderParseService/resources/database/university_list/cz.html +44 -0
- data/perl/HeaderParseService/resources/database/university_list/de.html +128 -0
- data/perl/HeaderParseService/resources/database/university_list/dean-mainlink.jpg +0 -0
- data/perl/HeaderParseService/resources/database/university_list/dk.html +42 -0
- data/perl/HeaderParseService/resources/database/university_list/ec.html +31 -0
- data/perl/HeaderParseService/resources/database/university_list/ee.html +30 -0
- data/perl/HeaderParseService/resources/database/university_list/eg.html +29 -0
- data/perl/HeaderParseService/resources/database/university_list/es.html +68 -0
- data/perl/HeaderParseService/resources/database/university_list/et.html +28 -0
- data/perl/HeaderParseService/resources/database/university_list/faq.html +147 -0
- data/perl/HeaderParseService/resources/database/university_list/fi.html +49 -0
- data/perl/HeaderParseService/resources/database/university_list/fj.html +28 -0
- data/perl/HeaderParseService/resources/database/university_list/fo.html +28 -0
- data/perl/HeaderParseService/resources/database/university_list/fr.html +106 -0
- data/perl/HeaderParseService/resources/database/university_list/geog.html +150 -0
- data/perl/HeaderParseService/resources/database/university_list/gr.html +38 -0
- data/perl/HeaderParseService/resources/database/university_list/gu.html +28 -0
- data/perl/HeaderParseService/resources/database/university_list/hk.html +34 -0
- data/perl/HeaderParseService/resources/database/university_list/hr.html +28 -0
- data/perl/HeaderParseService/resources/database/university_list/hu.html +46 -0
- data/perl/HeaderParseService/resources/database/university_list/id.html +29 -0
- data/perl/HeaderParseService/resources/database/university_list/ie.html +49 -0
- data/perl/HeaderParseService/resources/database/university_list/il.html +35 -0
- data/perl/HeaderParseService/resources/database/university_list/in.html +109 -0
- data/perl/HeaderParseService/resources/database/university_list/is.html +32 -0
- data/perl/HeaderParseService/resources/database/university_list/it.html +75 -0
- data/perl/HeaderParseService/resources/database/university_list/jm.html +28 -0
- data/perl/HeaderParseService/resources/database/university_list/jo.html +28 -0
- data/perl/HeaderParseService/resources/database/university_list/jp.html +155 -0
- data/perl/HeaderParseService/resources/database/university_list/kaplan.gif +0 -0
- data/perl/HeaderParseService/resources/database/university_list/kr.html +65 -0
- data/perl/HeaderParseService/resources/database/university_list/kw.html +28 -0
- data/perl/HeaderParseService/resources/database/university_list/lb.html +28 -0
- data/perl/HeaderParseService/resources/database/university_list/linkbw2.gif +0 -0
- data/perl/HeaderParseService/resources/database/university_list/lk.html +30 -0
- data/perl/HeaderParseService/resources/database/university_list/lt.html +31 -0
- data/perl/HeaderParseService/resources/database/university_list/lu.html +34 -0
- data/perl/HeaderParseService/resources/database/university_list/lv.html +30 -0
- data/perl/HeaderParseService/resources/database/university_list/ma.html +28 -0
- data/perl/HeaderParseService/resources/database/university_list/maczynski.gif +0 -0
- data/perl/HeaderParseService/resources/database/university_list/mirror.tar +0 -0
- data/perl/HeaderParseService/resources/database/university_list/mk.html +29 -0
- data/perl/HeaderParseService/resources/database/university_list/mo.html +29 -0
- data/perl/HeaderParseService/resources/database/university_list/mseawdm.gif +0 -0
- data/perl/HeaderParseService/resources/database/university_list/mt.html +28 -0
- data/perl/HeaderParseService/resources/database/university_list/mx.html +68 -0
- data/perl/HeaderParseService/resources/database/university_list/my.html +39 -0
- data/perl/HeaderParseService/resources/database/university_list/ni.html +28 -0
- data/perl/HeaderParseService/resources/database/university_list/nl.html +51 -0
- data/perl/HeaderParseService/resources/database/university_list/no.html +56 -0
- data/perl/HeaderParseService/resources/database/university_list/nz.html +41 -0
- data/perl/HeaderParseService/resources/database/university_list/pa.html +31 -0
- data/perl/HeaderParseService/resources/database/university_list/pe.html +40 -0
- data/perl/HeaderParseService/resources/database/university_list/ph.html +41 -0
- data/perl/HeaderParseService/resources/database/university_list/pl.html +51 -0
- data/perl/HeaderParseService/resources/database/university_list/pointcom.gif +0 -0
- data/perl/HeaderParseService/resources/database/university_list/pr.html +31 -0
- data/perl/HeaderParseService/resources/database/university_list/ps.html +28 -0
- data/perl/HeaderParseService/resources/database/university_list/pt.html +45 -0
- data/perl/HeaderParseService/resources/database/university_list/recognition.html +69 -0
- data/perl/HeaderParseService/resources/database/university_list/results.html +71 -0
- data/perl/HeaderParseService/resources/database/university_list/ro.html +38 -0
- data/perl/HeaderParseService/resources/database/university_list/ru.html +48 -0
- data/perl/HeaderParseService/resources/database/university_list/sd.html +28 -0
- data/perl/HeaderParseService/resources/database/university_list/se.html +57 -0
- data/perl/HeaderParseService/resources/database/university_list/sg.html +33 -0
- data/perl/HeaderParseService/resources/database/university_list/si.html +30 -0
- data/perl/HeaderParseService/resources/database/university_list/sk.html +35 -0
- data/perl/HeaderParseService/resources/database/university_list/th.html +45 -0
- data/perl/HeaderParseService/resources/database/university_list/tr.html +44 -0
- data/perl/HeaderParseService/resources/database/university_list/tw.html +76 -0
- data/perl/HeaderParseService/resources/database/university_list/ua.html +29 -0
- data/perl/HeaderParseService/resources/database/university_list/uk.html +168 -0
- data/perl/HeaderParseService/resources/database/university_list/univ-full.html +3166 -0
- data/perl/HeaderParseService/resources/database/university_list/univ.html +122 -0
- data/perl/HeaderParseService/resources/database/university_list/uy.html +31 -0
- data/perl/HeaderParseService/resources/database/university_list/ve.html +34 -0
- data/perl/HeaderParseService/resources/database/university_list/yu.html +28 -0
- data/perl/HeaderParseService/resources/database/university_list/za.html +46 -0
- data/perl/HeaderParseService/resources/database/university_list/zm.html +28 -0
- data/perl/HeaderParseService/resources/database/university_list.txt +3025 -0
- data/perl/HeaderParseService/resources/database/url.txt +1 -0
- data/perl/HeaderParseService/resources/database/webTopWords +225 -0
- data/perl/HeaderParseService/resources/database/words +45402 -0
- data/perl/HeaderParseService/resources/models/10ContextModelfold1 +369 -0
- data/perl/HeaderParseService/resources/models/10Modelfold1 +376 -0
- data/perl/HeaderParseService/resources/models/11ContextModelfold1 +400 -0
- data/perl/HeaderParseService/resources/models/11Modelfold1 +526 -0
- data/perl/HeaderParseService/resources/models/12ContextModelfold1 +510 -0
- data/perl/HeaderParseService/resources/models/12Modelfold1 +423 -0
- data/perl/HeaderParseService/resources/models/13ContextModelfold1 +364 -0
- data/perl/HeaderParseService/resources/models/13Modelfold1 +677 -0
- data/perl/HeaderParseService/resources/models/14ContextModelfold1 +459 -0
- data/perl/HeaderParseService/resources/models/14Modelfold1 +325 -0
- data/perl/HeaderParseService/resources/models/15ContextModelfold1 +340 -0
- data/perl/HeaderParseService/resources/models/15Modelfold1 +390 -0
- data/perl/HeaderParseService/resources/models/1ContextModelfold1 +668 -0
- data/perl/HeaderParseService/resources/models/1Modelfold1 +1147 -0
- data/perl/HeaderParseService/resources/models/2ContextModelfold1 +755 -0
- data/perl/HeaderParseService/resources/models/2Modelfold1 +796 -0
- data/perl/HeaderParseService/resources/models/3ContextModelfold1 +1299 -0
- data/perl/HeaderParseService/resources/models/3Modelfold1 +1360 -0
- data/perl/HeaderParseService/resources/models/4ContextModelfold1 +1062 -0
- data/perl/HeaderParseService/resources/models/4Modelfold1 +993 -0
- data/perl/HeaderParseService/resources/models/5ContextModelfold1 +1339 -0
- data/perl/HeaderParseService/resources/models/5Modelfold1 +2098 -0
- data/perl/HeaderParseService/resources/models/6ContextModelfold1 +888 -0
- data/perl/HeaderParseService/resources/models/6Modelfold1 +620 -0
- data/perl/HeaderParseService/resources/models/7ContextModelfold1 +257 -0
- data/perl/HeaderParseService/resources/models/7Modelfold1 +228 -0
- data/perl/HeaderParseService/resources/models/8ContextModelfold1 +677 -0
- data/perl/HeaderParseService/resources/models/8Modelfold1 +1871 -0
- data/perl/HeaderParseService/resources/models/9ContextModelfold1 +198 -0
- data/perl/HeaderParseService/resources/models/9Modelfold1 +170 -0
- data/perl/HeaderParseService/resources/models/NameSpaceModel +181 -0
- data/perl/HeaderParseService/resources/models/NameSpaceTrainF +347 -0
- data/perl/HeaderParseService/resources/models/WrapperBaseFeaDict +13460 -0
- data/perl/HeaderParseService/resources/models/WrapperContextFeaDict +14045 -0
- data/perl/HeaderParseService/resources/models/WrapperSpaceAuthorFeaDict +510 -0
- data/perl/HeaderParseService/tmp/tmpVec_1156237246.08016_test1 +23 -0
- data/perl/HeaderParseService/tmp/tmpVec_1156237246.08016_test10 +23 -0
- data/perl/HeaderParseService/tmp/tmpVec_1156237246.08016_test11 +23 -0
- data/perl/HeaderParseService/tmp/tmpVec_1156237246.08016_test12 +23 -0
- data/perl/HeaderParseService/tmp/tmpVec_1156237246.08016_test13 +23 -0
- data/perl/HeaderParseService/tmp/tmpVec_1156237246.08016_test14 +23 -0
- data/perl/HeaderParseService/tmp/tmpVec_1156237246.08016_test15 +23 -0
- data/perl/HeaderParseService/tmp/tmpVec_1156237246.08016_test2 +23 -0
- data/perl/HeaderParseService/tmp/tmpVec_1156237246.08016_test3 +23 -0
- data/perl/HeaderParseService/tmp/tmpVec_1156237246.08016_test4 +23 -0
- data/perl/HeaderParseService/tmp/tmpVec_1156237246.08016_test5 +23 -0
- data/perl/HeaderParseService/tmp/tmpVec_1156237246.08016_test6 +23 -0
- data/perl/HeaderParseService/tmp/tmpVec_1156237246.08016_test7 +23 -0
- data/perl/HeaderParseService/tmp/tmpVec_1156237246.08016_test8 +23 -0
- data/perl/HeaderParseService/tmp/tmpVec_1156237246.08016_test9 +23 -0
- data/perl/HeaderParseService/tmp/tmpVec_914027525.276114_test1 +23 -0
- data/perl/HeaderParseService/tmp/tmpVec_914027525.276114_test10 +23 -0
- data/perl/HeaderParseService/tmp/tmpVec_914027525.276114_test11 +23 -0
- data/perl/HeaderParseService/tmp/tmpVec_914027525.276114_test12 +23 -0
- data/perl/HeaderParseService/tmp/tmpVec_914027525.276114_test13 +23 -0
- data/perl/HeaderParseService/tmp/tmpVec_914027525.276114_test14 +23 -0
- data/perl/HeaderParseService/tmp/tmpVec_914027525.276114_test15 +23 -0
- data/perl/HeaderParseService/tmp/tmpVec_914027525.276114_test2 +23 -0
- data/perl/HeaderParseService/tmp/tmpVec_914027525.276114_test3 +23 -0
- data/perl/HeaderParseService/tmp/tmpVec_914027525.276114_test4 +23 -0
- data/perl/HeaderParseService/tmp/tmpVec_914027525.276114_test5 +23 -0
- data/perl/HeaderParseService/tmp/tmpVec_914027525.276114_test6 +23 -0
- data/perl/HeaderParseService/tmp/tmpVec_914027525.276114_test7 +23 -0
- data/perl/HeaderParseService/tmp/tmpVec_914027525.276114_test8 +23 -0
- data/perl/HeaderParseService/tmp/tmpVec_914027525.276114_test9 +23 -0
- data/perl/ParsCit/README.TXT +82 -0
- data/perl/ParsCit/crfpp/traindata/parsCit.template +60 -0
- data/perl/ParsCit/crfpp/traindata/parsCit.train.data +12104 -0
- data/perl/ParsCit/crfpp/traindata/tagged_references.txt +500 -0
- data/perl/ParsCit/lib/CSXUtil/SafeText.pm +140 -0
- data/perl/ParsCit/lib/ParsCit/Citation.pm +462 -0
- data/perl/ParsCit/lib/ParsCit/CitationContext.pm +132 -0
- data/perl/ParsCit/lib/ParsCit/Config.pm +46 -0
- data/perl/ParsCit/lib/ParsCit/Controller.pm +306 -0
- data/perl/ParsCit/lib/ParsCit/PostProcess.pm +367 -0
- data/perl/ParsCit/lib/ParsCit/PreProcess.pm +333 -0
- data/perl/ParsCit/lib/ParsCit/Tr2crfpp.pm +331 -0
- data/perl/ParsCit/resources/parsCit.model +0 -0
- data/perl/ParsCit/resources/parsCitDict.txt +148783 -0
- data/perl/extract.pl +199 -0
- data/spec/biblicit/cb2bib_spec.rb +48 -0
- data/spec/biblicit/citeseer_spec.rb +40 -0
- data/spec/fixtures/pdf/10.1.1.109.4049.pdf +0 -0
- data/spec/fixtures/pdf/Bagnoli Watts TAR 2010.pdf +0 -0
- data/spec/fixtures/pdf/ICINCO_2010.pdf +0 -0
- data/spec/spec_helper.rb +3 -0
- metadata +474 -0
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
<HTML>
|
|
2
|
+
<HEAD><TITLE>College and University Home Pages</TITLE>
|
|
3
|
+
<LINK HREF="mailto:cdemello@mit.edu">
|
|
4
|
+
</HEAD>
|
|
5
|
+
<BODY>
|
|
6
|
+
|
|
7
|
+
<H1><A HREF="http://www.eff.org/blueribbon.html"><IMG ALIGN="TOP" ALT="[Blue
|
|
8
|
+
Ribbon]" SRC="blueribbon.gif"></A> College and University Home Pages -
|
|
9
|
+
Alphabetical Listing</H1>
|
|
10
|
+
|
|
11
|
+
<HR>
|
|
12
|
+
(C)Copyright 1995,1996 <A HREF="http://www.mit.edu:8001/people/cdemello/home.html">Christina DeMello</A>. Reproduction and distribution are
|
|
13
|
+
permissible for non-profit purposes only, but no changes are to be
|
|
14
|
+
made to these documents without the author's written consent. <P>
|
|
15
|
+
|
|
16
|
+
<STRONG>July, 1996</STRONG>. This list now has over
|
|
17
|
+
<STRONG>3000</STRONG> entries! Several other sites have given special
|
|
18
|
+
attention to this site, so they are now listed on a <A
|
|
19
|
+
HREF="recognition.html">recognition</A> page. <P>
|
|
20
|
+
|
|
21
|
+
In response to the increasing amount of email I have been getting, I've
|
|
22
|
+
compiled a list of <A HREF="faq.html">frequently asked
|
|
23
|
+
questions</A>. <EM>Please read these answers before sending me email!</EM>
|
|
24
|
+
<P>
|
|
25
|
+
|
|
26
|
+
<STRONG>SURVEY:</STRONG>I'm
|
|
27
|
+
no longer able to keep up with the survey on 'good school homepages', but
|
|
28
|
+
you can see the final <A HREF="results.html">results.</A> Look for an
|
|
29
|
+
updated survey within the next couple of months.<P>
|
|
30
|
+
|
|
31
|
+
For faster access, you may also want to try the following mirror sites:
|
|
32
|
+
|
|
33
|
+
<UL>
|
|
34
|
+
|
|
35
|
+
<LI><A HREF="http://www.mit.edu:8001/people/cdemello/univ.html">Boston, MA (Original Site - always has latest copy)</A>
|
|
36
|
+
<LI><A HREF="http://www.indiana.edu/~librcsd/resource/univ/univ.html">Indiana
|
|
37
|
+
University Libraries, Reseach Collections and Services Department</A>
|
|
38
|
+
<LI><A HREF="http://www.shu.edu/docs/world/schools/univ.html">Seton Hall University, South Orange, NJ</A>
|
|
39
|
+
<LI><A
|
|
40
|
+
HREF="http://www.mbark.swin.edu.au/mbark/pages/othersites/unis/cdemello/">Swinburne University, Australia</A>
|
|
41
|
+
<LI> <A
|
|
42
|
+
HREF="http://www.rirr.cnuce.cnr.it/universities/univ.html">CNUCE
|
|
43
|
+
Institute Pisa, Italy</A>
|
|
44
|
+
<LI> <A HREF="http://www-mcb.ucdavis.edu/info/CandU/univ.html">UC Davis,
|
|
45
|
+
Davis, CA</A>
|
|
46
|
+
<LI> <A HREF="http://geowww.uibk.ac.at/links/uni-search.html">University of
|
|
47
|
+
Innsbruck, Austria</A> (Searchable Index)
|
|
48
|
+
<LI> <A HREF="http://www.vol.it/mirror/universita/">Video On Line, Italy</A>
|
|
49
|
+
|
|
50
|
+
<LI> <A HREF="http://persia.nic.ir/~elibrary/Univ/univ.html">Institute for
|
|
51
|
+
Theoritical Studies in Physics and Mathematics, Iran</A>
|
|
52
|
+
</UL>
|
|
53
|
+
|
|
54
|
+
<HR>
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
All schools are currently arranged alphabetically. A <A
|
|
58
|
+
HREF="geog.html">geographical listing</A> is available as well. To
|
|
59
|
+
view colleges under a particular letter, select it below:
|
|
60
|
+
|
|
61
|
+
<P>
|
|
62
|
+
|
|
63
|
+
<A HREF="A.html">A</A>, <A HREF="B.html">B</A>, <A
|
|
64
|
+
HREF="C.html">C</A>, <A HREF="D.html">D</A>, <A HREF="E.html">E</A>,
|
|
65
|
+
<A HREF="F.html">F</A>, <A HREF="G.html">G</A>, <A
|
|
66
|
+
HREF="H.html">H</A>, <A HREF="I.html">I</A>, <A HREF="J.html">J</A>,
|
|
67
|
+
<A HREF="K.html">K</A>, <A HREF="L.html">L</A>, <A
|
|
68
|
+
HREF="M.html">M</A>, <A HREF="N.html">N</A>, <A HREF="O.html">O</A>,
|
|
69
|
+
<A HREF="P.html">P</A>, <A HREF="Q.html">Q</A>, <A
|
|
70
|
+
HREF="R.html">R</A>, <A HREF="S.html">S</A>, <A HREF="T.html">T</A>,
|
|
71
|
+
<A HREF="U.html">U</A>, <A HREF="V.html">V</A>, <A
|
|
72
|
+
HREF="W.html">W</A>, <A HREF="X.html">X</A>, <A
|
|
73
|
+
HREF="Y.html">Y</A>,<A HREF="Z.html">Z</A>
|
|
74
|
+
|
|
75
|
+
<P>
|
|
76
|
+
|
|
77
|
+
You can also access:
|
|
78
|
+
<UL>
|
|
79
|
+
<LI> <A HREF="univ-full.html">Full list of Universities</A>
|
|
80
|
+
|
|
81
|
+
<LI> <A HREF="geog.html">Geographical Listings of Universities</A>
|
|
82
|
+
|
|
83
|
+
<LI> <A HREF="faq.html">Frequently Asked Questions and their Answers</A>
|
|
84
|
+
|
|
85
|
+
<LI> <A HREF="results.html">Survey Results</A>
|
|
86
|
+
|
|
87
|
+
<LI> <A HREF="mirror.tar.Z">Compressed Tar File of All Files</A>
|
|
88
|
+
|
|
89
|
+
<LI> <A HREF="mirror.tar.gz">Gzipped Tar File of All Files</A>
|
|
90
|
+
</UL>
|
|
91
|
+
|
|
92
|
+
<HR>
|
|
93
|
+
|
|
94
|
+
These lists contain home pages (please do not send non-http URLs) for
|
|
95
|
+
universities and colleges all over the world. They do not contain
|
|
96
|
+
departmental pages unless the page in question did an adequate job of
|
|
97
|
+
providing general campus information as well, and the lists only include one
|
|
98
|
+
link per school. You can get more information about these lists in the new
|
|
99
|
+
<A HREF="faq.html">FAQ</A>. For a list of US Schools only, see Mike
|
|
100
|
+
Conlon's <A
|
|
101
|
+
HREF="http://www.clas.ufl.edu/CLAS/american-universities.html">list of
|
|
102
|
+
American Universities</A>. <P>
|
|
103
|
+
|
|
104
|
+
You can now send updates to the lists via <A
|
|
105
|
+
HREF="http://www.mit.edu:8001/people/cdemello/new-school.html">a
|
|
106
|
+
form</A>. If you do not have forms support, please send additions,
|
|
107
|
+
corrections, and comments to <EM><A
|
|
108
|
+
HREF="mailto:cdemello@mit.edu">cdemello@mit.edu</A></EM>. Please allow some
|
|
109
|
+
time for a response as I no longer have a direct net connection at work
|
|
110
|
+
every day. Also, this list may be updated more quickly than I can respond
|
|
111
|
+
personally to email.<P>
|
|
112
|
+
|
|
113
|
+
<HR>
|
|
114
|
+
<EM>cdemello@mit.edu, cdemello@us.oracle.com</EM>
|
|
115
|
+
|
|
116
|
+
<h5>Last modified: Mon Jul 15 00:14:26 1996</h5>
|
|
117
|
+
</BODY>
|
|
118
|
+
</HTML>
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
<HTML><HEAD><TITLE>Colleges and Universities - Uruguay </TITLE>
|
|
2
|
+
<LINK HREF="mailto:cdemello@mit.edu">
|
|
3
|
+
</HEAD><BODY><H1><A HREF="http://www.eff.org/blueribbon.html"><IMG ALIGN="TOP" ALT="[Blue Ribbon]" SRC="blueribbon.gif"></A>Colleges and Universities - Uruguay </H1>
|
|
4
|
+
<HR>
|
|
5
|
+
|
|
6
|
+
(C)Copyright 1995,1996 Christina DeMello. Reproduction and distribution are
|
|
7
|
+
permissible for non-profit purposes only, but no changes are to be
|
|
8
|
+
made to this document without the author's written consent. <P>
|
|
9
|
+
|
|
10
|
+
Please send additions, corrections, and comments to <EM><A
|
|
11
|
+
HREF="mailto:cdemello@mit.edu">cdemello@mit.edu</A></EM>. Please
|
|
12
|
+
give me a few days to respond as I no longer have a
|
|
13
|
+
direct net connection at work every day. Also, this list may be
|
|
14
|
+
updated more quickly than I can respond personally to email.<P> <HR> <OL>
|
|
15
|
+
|
|
16
|
+
<h5>Last modified: Thu Jul 11 20:53:17 1996</h5>
|
|
17
|
+
|
|
18
|
+
<LI> <A HREF="http://www.ort.edu.uy/">ORT Uruguay</A>
|
|
19
|
+
<LI> <A HREF="http://www.rau.edu.uy/universidad">Universidad de la Republica</A>
|
|
20
|
+
<LI> <A HREF="http://fisica.edu.uy/">Universidad de la Republica Oriental del Uruguay</A>
|
|
21
|
+
<LI> <A HREF="http://www.chasque.apc.org/pasecco/maryland/">University of Maryland, Montevideo Uruguay</A>
|
|
22
|
+
</OL>
|
|
23
|
+
<A HREF="geog.html">Return to Top Level</A>
|
|
24
|
+
<HR>
|
|
25
|
+
<EM><A HREF="mailto:cdemello@mit.edu">cdemello@mit.edu, cdemello@us.oracle.com</A></EM>
|
|
26
|
+
<P>
|
|
27
|
+
Last updated:
|
|
28
|
+
|
|
29
|
+
Mon Jul 15 00:42:41 PDT 1996
|
|
30
|
+
</BODY>
|
|
31
|
+
</HTML>
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
<HTML><HEAD><TITLE>Colleges and Universities - Venezuela </TITLE>
|
|
2
|
+
<LINK HREF="mailto:cdemello@mit.edu">
|
|
3
|
+
</HEAD><BODY><H1><A HREF="http://www.eff.org/blueribbon.html"><IMG ALIGN="TOP" ALT="[Blue Ribbon]" SRC="blueribbon.gif"></A>Colleges and Universities - Venezuela </H1>
|
|
4
|
+
<HR>
|
|
5
|
+
|
|
6
|
+
(C)Copyright 1995,1996 Christina DeMello. Reproduction and distribution are
|
|
7
|
+
permissible for non-profit purposes only, but no changes are to be
|
|
8
|
+
made to this document without the author's written consent. <P>
|
|
9
|
+
|
|
10
|
+
Please send additions, corrections, and comments to <EM><A
|
|
11
|
+
HREF="mailto:cdemello@mit.edu">cdemello@mit.edu</A></EM>. Please
|
|
12
|
+
give me a few days to respond as I no longer have a
|
|
13
|
+
direct net connection at work every day. Also, this list may be
|
|
14
|
+
updated more quickly than I can respond personally to email.<P> <HR> <OL>
|
|
15
|
+
|
|
16
|
+
<h5>Last modified: Thu Jul 11 20:53:17 1996</h5>
|
|
17
|
+
|
|
18
|
+
<LI> <A HREF="http://www.ucab.edu.ve/">Universidad Católica Andrés Bello</A>
|
|
19
|
+
<LI> <A HREF="http://www.ucv.edu.ve/">Universidad Central de Venezuela</A>
|
|
20
|
+
<LI> <A HREF="http://ourworld.compuserve.com/homepages/jzozaya/unimet.htm">Universidad Metropolitana</A>
|
|
21
|
+
<LI> <A HREF="http://www.une.edu.ve/">Universidad Nueva Esparta</A>
|
|
22
|
+
<LI> <A HREF="http://www.usb.ve/">Universidad Simón Bolívar (USB)</A>
|
|
23
|
+
<LI> <A HREF="http://mozart.ing.ula.ve/ula.html">Universidad de Los Andes (ULA)</A>
|
|
24
|
+
<LI> <A HREF="http://www.luz.ve/">Universidad del Zulia</A>
|
|
25
|
+
</OL>
|
|
26
|
+
<A HREF="geog.html">Return to Top Level</A>
|
|
27
|
+
<HR>
|
|
28
|
+
<EM><A HREF="mailto:cdemello@mit.edu">cdemello@mit.edu, cdemello@us.oracle.com</A></EM>
|
|
29
|
+
<P>
|
|
30
|
+
Last updated:
|
|
31
|
+
|
|
32
|
+
Mon Jul 15 00:42:42 PDT 1996
|
|
33
|
+
</BODY>
|
|
34
|
+
</HTML>
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
<HTML><HEAD><TITLE>Colleges and Universities - Yugoslavia </TITLE>
|
|
2
|
+
<LINK HREF="mailto:cdemello@mit.edu">
|
|
3
|
+
</HEAD><BODY><H1><A HREF="http://www.eff.org/blueribbon.html"><IMG ALIGN="TOP" ALT="[Blue Ribbon]" SRC="blueribbon.gif"></A>Colleges and Universities - Yugoslavia </H1>
|
|
4
|
+
<HR>
|
|
5
|
+
|
|
6
|
+
(C)Copyright 1995,1996 Christina DeMello. Reproduction and distribution are
|
|
7
|
+
permissible for non-profit purposes only, but no changes are to be
|
|
8
|
+
made to this document without the author's written consent. <P>
|
|
9
|
+
|
|
10
|
+
Please send additions, corrections, and comments to <EM><A
|
|
11
|
+
HREF="mailto:cdemello@mit.edu">cdemello@mit.edu</A></EM>. Please
|
|
12
|
+
give me a few days to respond as I no longer have a
|
|
13
|
+
direct net connection at work every day. Also, this list may be
|
|
14
|
+
updated more quickly than I can respond personally to email.<P> <HR> <OL>
|
|
15
|
+
|
|
16
|
+
<h5>Last modified: Thu Jul 11 20:53:17 1996</h5>
|
|
17
|
+
|
|
18
|
+
<LI> <A HREF="http://147.91.1.5/bu/">Univerzitet u Beogradu</A>
|
|
19
|
+
</OL>
|
|
20
|
+
<A HREF="geog.html">Return to Top Level</A>
|
|
21
|
+
<HR>
|
|
22
|
+
<EM><A HREF="mailto:cdemello@mit.edu">cdemello@mit.edu, cdemello@us.oracle.com</A></EM>
|
|
23
|
+
<P>
|
|
24
|
+
Last updated:
|
|
25
|
+
|
|
26
|
+
Mon Jul 15 00:42:43 PDT 1996
|
|
27
|
+
</BODY>
|
|
28
|
+
</HTML>
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
<HTML><HEAD><TITLE>Colleges and Universities - South Africa </TITLE>
|
|
2
|
+
<LINK HREF="mailto:cdemello@mit.edu">
|
|
3
|
+
</HEAD><BODY><H1><A HREF="http://www.eff.org/blueribbon.html"><IMG ALIGN="TOP" ALT="[Blue Ribbon]" SRC="blueribbon.gif"></A>Colleges and Universities - South Africa </H1>
|
|
4
|
+
<HR>
|
|
5
|
+
|
|
6
|
+
(C)Copyright 1995,1996 Christina DeMello. Reproduction and distribution are
|
|
7
|
+
permissible for non-profit purposes only, but no changes are to be
|
|
8
|
+
made to this document without the author's written consent. <P>
|
|
9
|
+
|
|
10
|
+
Please send additions, corrections, and comments to <EM><A
|
|
11
|
+
HREF="mailto:cdemello@mit.edu">cdemello@mit.edu</A></EM>. Please
|
|
12
|
+
give me a few days to respond as I no longer have a
|
|
13
|
+
direct net connection at work every day. Also, this list may be
|
|
14
|
+
updated more quickly than I can respond personally to email.<P> <HR> <OL>
|
|
15
|
+
|
|
16
|
+
<h5>Last modified: Thu Jul 11 20:53:17 1996</h5>
|
|
17
|
+
|
|
18
|
+
<LI> <A HREF="http://lin01.global.co.za/business/bmtc/">Business Management Training College of Southern Africa</A>
|
|
19
|
+
<LI> <A HREF="http://www.puk.ac.za/">Potchefstroom University for Christian Higher Education</A>
|
|
20
|
+
<LI> <A HREF="http://www.rau.ac.za/">Rand Afrikaans University</A>
|
|
21
|
+
<LI> <A HREF="http://www.ru.ac.za/">Rhodes University</A>
|
|
22
|
+
<LI> <A HREF="http://www.trsa.ac.za/">Technikon Southern Africa</A>
|
|
23
|
+
<LI> <A HREF="http://www.uct.ac.za/">University of Cape Town</A>
|
|
24
|
+
<LI> <A HREF="http://www.udw.ac.za/">University of Durban-Westville</A>
|
|
25
|
+
<LI> <A HREF="http://www.und.ac.za/">University of Natal (Durban)</A>
|
|
26
|
+
<LI> <A HREF="http://www.unp.ac.za/">University of Natal, Pietermaritzburg</A>
|
|
27
|
+
<LI> <A HREF="http://www.upe.ac.za/">University of Port Elizabeth</A>
|
|
28
|
+
<LI> <A HREF="http://www.up.ac.za/">University of Pretoria</A>
|
|
29
|
+
<LI> <A HREF="http://www.unisa.ac.za/">University of South Africa</A>
|
|
30
|
+
<LI> <A HREF="http://www.sun.ac.za">University of Stellenbosch</A>
|
|
31
|
+
<LI> <A HREF="http://www.utr.ac.za/">University of Transkei</A>
|
|
32
|
+
<LI> <A HREF="http://www.wits.ac.za/">University of Witwatersrand</A>
|
|
33
|
+
<LI> <A HREF="http://www.uzulu.ac.za/">University of Zululand</A>
|
|
34
|
+
<LI> <A HREF="http://www.uovs.ac.za/">University of the Orange Free State</A>
|
|
35
|
+
<LI> <A HREF="http://www.uwc.ac.za/">University of the Western Cape</A>
|
|
36
|
+
<LI> <A HREF="http://www.vista.ac.za/">Vista University</A>
|
|
37
|
+
</OL>
|
|
38
|
+
<A HREF="geog.html">Return to Top Level</A>
|
|
39
|
+
<HR>
|
|
40
|
+
<EM><A HREF="mailto:cdemello@mit.edu">cdemello@mit.edu, cdemello@us.oracle.com</A></EM>
|
|
41
|
+
<P>
|
|
42
|
+
Last updated:
|
|
43
|
+
|
|
44
|
+
Mon Jul 15 00:42:44 PDT 1996
|
|
45
|
+
</BODY>
|
|
46
|
+
</HTML>
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
<HTML><HEAD><TITLE>Colleges and Universities - Zambia </TITLE>
|
|
2
|
+
<LINK HREF="mailto:cdemello@mit.edu">
|
|
3
|
+
</HEAD><BODY><H1><A HREF="http://www.eff.org/blueribbon.html"><IMG ALIGN="TOP" ALT="[Blue Ribbon]" SRC="blueribbon.gif"></A>Colleges and Universities - Zambia </H1>
|
|
4
|
+
<HR>
|
|
5
|
+
|
|
6
|
+
(C)Copyright 1995,1996 Christina DeMello. Reproduction and distribution are
|
|
7
|
+
permissible for non-profit purposes only, but no changes are to be
|
|
8
|
+
made to this document without the author's written consent. <P>
|
|
9
|
+
|
|
10
|
+
Please send additions, corrections, and comments to <EM><A
|
|
11
|
+
HREF="mailto:cdemello@mit.edu">cdemello@mit.edu</A></EM>. Please
|
|
12
|
+
give me a few days to respond as I no longer have a
|
|
13
|
+
direct net connection at work every day. Also, this list may be
|
|
14
|
+
updated more quickly than I can respond personally to email.<P> <HR> <OL>
|
|
15
|
+
|
|
16
|
+
<h5>Last modified: Thu Jul 11 20:53:17 1996</h5>
|
|
17
|
+
|
|
18
|
+
<LI> <A HREF="http://www.zamnet.zm/unza/unza.html">University of Zambia</A>
|
|
19
|
+
</OL>
|
|
20
|
+
<A HREF="geog.html">Return to Top Level</A>
|
|
21
|
+
<HR>
|
|
22
|
+
<EM><A HREF="mailto:cdemello@mit.edu">cdemello@mit.edu, cdemello@us.oracle.com</A></EM>
|
|
23
|
+
<P>
|
|
24
|
+
Last updated:
|
|
25
|
+
|
|
26
|
+
Mon Jul 15 00:42:45 PDT 1996
|
|
27
|
+
</BODY>
|
|
28
|
+
</HTML>
|