RubyGems - biblicit - Versions diffs - 2.2.1 → 2.2.2 - Mend

biblicit 2.2.1 → 2.2.2

Files changed (17) hide show

data/README.md +2 -2
data/lib/biblicit/version.rb +1 -1
data/parscit/bin/citeExtract.pl +4 -3
data/parscit/bin/parsHed/redo.parsHed.pl +2 -1
data/parscit/bin/phOutput2xml.pl +2 -1
data/parscit/bin/redo.parsCit.pl +2 -1
data/parscit/bin/sectLabel/genericSectExtract.rb +1 -1
data/parscit/bin/tr2crfpp.pl +2 -1
data/parscit/lib/HeaderParse/Config/API_Config.pm +1 -1
data/parscit/lib/ParsCit/Tr2crfpp.pm +2 -2
data/parscit/lib/ParsHed/Tr2crfpp.pm +3 -3
data/parscit/lib/ParsHed/Tr2crfpp_token.pm +2 -2
data/parscit/lib/SectLabel/AAMatching.pm +2 -1
data/parscit/lib/SectLabel/Controller.pm +4 -2
data/parscit/lib/SectLabel/Tr2crfpp.pm +3 -2
metadata +4 -5
data/parscit/doc/index.html +0 -692

data/README.md CHANGED Viewed

@@ -13,7 +13,7 @@ Note: The version is 2.x, but really should be 0.2.x.
   result = Biblicit::Extractor.extract(content: "a string containing the content of a PDF file")
   # Extract metadata from a file using all available tools
-  result = Biblicit::Extractor.extract(file: "myfile.pdf", tools: [:citeseer, :parshed, :cb2bib], remote: true, token: false)
+  result = Biblicit::Extractor.extract(file: "myfile.pdf", tools: [:parshed, :cb2bib], remote: true, token: false)
   # See reference information for "myfile.pdf"
   result[:citeseer][:title]
@@ -139,7 +139,7 @@ You can specify where you have installed CRF++ by setting the CRFPP_HOME environ
     sudo apt-add-repository 'deb http://cl.naist.jp/~eric-n/ubuntu-nlp oneiric all'
     sudo apt-get update
-    sudo apt-get install libcrf++ crf++
+    sudo apt-get install libcrf++-dev crf++
 ##### On OS X with Homebrew

data/lib/biblicit/version.rb CHANGED Viewed

@@ -2,6 +2,6 @@
 module Biblicit
-  VERSION = '2.2.1'
+  VERSION = '2.2.2'
 end

data/parscit/bin/citeExtract.pl CHANGED Viewed

@@ -53,8 +53,8 @@ $tmpfile	.= $$ . time;
 # Untaint tmpfile variable
 if ($tmpfile =~ /^([-\@\w.]+)$/) { $tmpfile = $1; }
-$tmpfile		= "/tmp/" . $tmpfile;
+my $tmpdir = $ENV{'PARSCIT_TMPDIR'} || "/tmp";
+$tmpfile		= "$tmpdir/$tmpfile";
 $0				=~ /([^\/]+)$/;
 my $progname	= $1;
@@ -379,7 +379,8 @@ sub BiblioScript
 	my ($types, $pc_xml, $outfile) = @_;
 	my @export_types	= @{ $types };
-	my $tmp_dir			= "/tmp/" . NewTmpFile();
+  my $base_tmp_dir = $ENV{'PARSCIT_TMPDIR'} || "/tmp";
+	my $tmp_dir			= $base_tmp_dir . "/" . NewTmpFile();
 	system("mkdir -p $tmp_dir");
 	# Write extract_citation output to a tmp file

data/parscit/bin/parsHed/redo.parsHed.pl CHANGED Viewed

@@ -10,7 +10,8 @@ use FindBin;
 my $tmpfile .= $0; $tmpfile =~ s/[\.\/]//g;
 $tmpfile .= $$ . time;
 if ($tmpfile =~ /^([-\@\w.]+)$/) { $tmpfile = $1; }		      # untaint tmpfile variable
-$tmpfile = "/tmp/" . $tmpfile;
+my $tmpdir = $ENV{'PARSCIT_TMPDIR'} || "/tmp";
+$tmpfile = "$tmpdir/$tmpfile";
 $0 =~ /([^\/]+)$/; my $progname = $1;
 my $outputVersion = "1.0";

data/parscit/bin/phOutput2xml.pl CHANGED Viewed

@@ -29,7 +29,8 @@ use strict 'vars';
 my $tmpfile .= $0; $tmpfile =~ s/[\.\/]//g;
 $tmpfile .= $$ . time;
 if ($tmpfile =~ /^([-\@\w.]+)$/) { $tmpfile = $1; }		      # untaint tmpfile variable
-$tmpfile = "/tmp/" . $tmpfile;
+my $tmpdir = $ENV{'PARSCIT_TMPDIR'} || "/tmp";
+$tmpfile = "$tmpdir/$tmpfile";
 $0 =~ /([^\/]+)$/; my $progname = $1;
 my $outputVersion = "1.0";
 ### END user customizable section

data/parscit/bin/redo.parsCit.pl CHANGED Viewed

@@ -5,7 +5,8 @@
 my $tmpfile .= $0; $tmpfile =~ s/[\.\/]//g;
 $tmpfile .= $$ . time;
 if ($tmpfile =~ /^([-\@\w.]+)$/) { $tmpfile = $1; }		      # untaint tmpfile variable
-$tmpfile = "/tmp/" . $tmpfile;
+my $tmpdir = $ENV{'PARSCIT_TMPDIR'} || "/tmp";
+$tmpfile = "$tmpdir/$tmpfile";
 $0 =~ /([^\/]+)$/; my $progname = $1;
 my $outputVersion = "1.0";
 my $parscitHome = "/home/wing.nus/services/parscit/tools/";

data/parscit/bin/sectLabel/genericSectExtract.rb CHANGED Viewed

@@ -8,7 +8,7 @@ pwd = File.dirname(__FILE__)
 @CRFPP  = ENV['CRFPP_HOME'] ? "#{ENV['CRFPP_HOME']}/bin" : "#{pwd}/../../crfpp"
 @SRC    = "#{pwd}/genericSect"
 @DATA   = "#{pwd}/../../resources/sectLabel/"
-@TEST_DIR = "/tmp/"
+@TEST_DIR = ENV['PARSCIT_TMPDIR'] || "/tmp"
 require "#{@SRC}/forceUtf8"

data/parscit/bin/tr2crfpp.pl CHANGED Viewed

@@ -29,7 +29,8 @@ use FindBin;
 my $tmpfile .= $0; $tmpfile =~ s/[\.\/]//g;
 $tmpfile .= $$ . time;
 if ($tmpfile =~ /^([-\@\w.]+)$/) { $tmpfile = $1; }		      # untaint tmpfile variable
-$tmpfile = "/tmp/" . $tmpfile;
+my $tmpdir = $ENV{'PARSCIT_TMPDIR'} || "/tmp";
+$tmpfile = "$tmpdir/$tmpfile";
 $0 =~ /([^\/]+)$/; my $progname = $1;
 my $outputVersion = "1.0";
 my $dictFile = "$FindBin::Bin/../resources/parsCitDict.txt";

data/parscit/lib/HeaderParse/Config/API_Config.pm CHANGED Viewed

@@ -33,7 +33,7 @@ $Resource_Dir = "$parscitHome/resources/headerParse";
 $Database_Dir = "$Resource_Dir/database/";
 $Data_Dir = "$Resource_Dir/data/";
 $offlineD = "$Resource_Dir/models/";
-$Tmp_Dir = "$parscitHome/tmp";
+$Tmp_Dir = $ENV{'PARSCIT_TMPDIR'} || "/tmp";
 $nMinHeaderLength = 50;
 $nMaxHeaderLength = 2500;

data/parscit/lib/ParsCit/Tr2crfpp.pm CHANGED Viewed

@@ -982,8 +982,8 @@ sub BuildTmpFile
 	###
 	# Altered by Min (Thu Feb 28 13:08:59 SGT 2008)
 	###
-    return "/tmp/$tmpfile";
-    # return $tmpfile;
+    my $tmpdir = $ENV{'PARSCIT_TMPDIR'} || "/tmp";
+    return "$tmpdir/$tmpfile";
 }
 sub Fatal

data/parscit/lib/ParsHed/Tr2crfpp.pm CHANGED Viewed

@@ -329,9 +329,9 @@ sub buildTmpFile {
     if ($tmpfile =~ /^([-\@\w.]+)$/) {
 	$tmpfile = $1;
     }
-    # return $tmpfile;
-    return "/tmp/$tmpfile"; # Altered by Min (Thu Feb 28 13:08:59 SGT 2008)
+    my $tmpdir = $ENV{'PARSCIT_TMPDIR'} || "/tmp";
+    return "$tmpdir/$tmpfile"; # Altered by Min (Thu Feb 28 13:08:59 SGT 2008)
 }  # buildTmpFile

data/parscit/lib/ParsHed/Tr2crfpp_token.pm CHANGED Viewed

@@ -216,8 +216,8 @@ sub buildTmpFile {
 	$tmpfile = $1;
     }
-    # return $tmpfile;
-    return "/tmp/$tmpfile"; # Altered by Min (Thu Feb 28 13:08:59 SGT 2008)
+    my $tmpdir = $ENV{'PARSCIT_TMPDIR'} || "/tmp";
+    return "$tmpdir/$tmpfile"; # Altered by Min (Thu Feb 28 13:08:59 SGT 2008)
 }  # buildTmpFile

data/parscit/lib/SectLabel/AAMatching.pm CHANGED Viewed

@@ -1930,7 +1930,8 @@ sub BuildTmpFile
 		$tmpfile = $1;
     }
-    return "/tmp/$tmpfile"; # Altered by Min (Thu Feb 28 13:08:59 SGT 2008)
+    my $tmpdir = $ENV{'PARSCIT_TMPDIR'} || "/tmp";
+    return "$tmpdir/$tmpfile"; # Altered by Min (Thu Feb 28 13:08:59 SGT 2008)
 }
 1;

data/parscit/lib/SectLabel/Controller.pm CHANGED Viewed

@@ -212,7 +212,7 @@ sub GetGenericHeaders
 	my $num_headers = scalar(@{ $headers });
 	# Put the list of headers to file
-  	my $header_file = "/tmp/" . NewTmpFile();
+  my $header_file = NewTmpFile();
   	$generic_sect_path = UntaintPath($generic_sect_path);
@@ -328,7 +328,9 @@ sub NewTmpFile
 	chomp($tmpfile);
 	$tmpfile = UntaintPath($tmpfile);
-	return $tmpfile;
+  my $tmpdir = $ENV{'PARSCIT_TMPDIR'} || "/tmp";
+	return "$tmpdir/$tmpfile";
 }
 1;

data/parscit/lib/SectLabel/Tr2crfpp.pm CHANGED Viewed

@@ -1079,8 +1079,9 @@ sub BuildTmpFile
 	{
 		$tmpfile = $1;
     }
-    return "/tmp/$tmpfile"; # Altered by Min (Thu Feb 28 13:08:59 SGT 2008)
+    my $tmpdir = $ENV{'PARSCIT_TMPDIR'} || "/tmp";
+    return "$tmpdir/$tmpfile"; # Altered by Min (Thu Feb 28 13:08:59 SGT 2008)
 }

metadata CHANGED Viewed

@@ -2,14 +2,14 @@
 name: biblicit
 version: !ruby/object:Gem::Version
   prerelease:
-  version: 2.2.1
+  version: 2.2.2
 platform: ruby
 authors:
 - David Judd
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2013-05-08 00:00:00.000000000 Z
+date: 2013-05-13 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   prerelease: false
@@ -153,7 +153,6 @@ files:
 - parscit/bin/sectLabel/single2multi.pl
 - parscit/bin/sectLabel/tr2crfpp.pl
 - parscit/bin/tr2crfpp.pl
-- parscit/doc/index.html
 - parscit/lib/CSXUtil/SafeText.pm
 - parscit/lib/HeaderParse/API/AssembleXMLMetadata.pm
 - parscit/lib/HeaderParse/API/Function.pm
@@ -435,7 +434,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
     - !ruby/object:Gem::Version
       segments:
       - 0
-      hash: 782848681955337634
+      hash: -1109935214577805230
       version: '0'
   none: false
 required_rubygems_version: !ruby/object:Gem::Requirement
@@ -444,7 +443,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
     - !ruby/object:Gem::Version
       segments:
       - 0
-      hash: 782848681955337634
+      hash: -1109935214577805230
       version: '0'
   none: false
 requirements:

data/parscit/doc/index.html DELETED Viewed

@@ -1,692 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML 3.0//EN">
-<HTML><HEAD>
-<META HTTP-EQUIV="Keywords" NAME="Keywords" CONTENT="citations, citation parser, references, reference parser, bibliography parser, reference string parser, bibtex, citation, citation extraction, logical structure, document logical structure">
-<LINK REL="stylesheet" HREF="parsCit.css"><TITLE>ParsCit: An open-source CRF Reference String and Logical Document Structure Parsing Package</TITLE>
-<script type="text/javascript">
-function toggleLayer( whichLayer ) {
-  var elem, vis;
-  if( document.getElementById ) // this is the way the standards work
-    elem = document.getElementById( whichLayer );
-  else if( document.all ) // this is the way old msie versions work
-      elem = document.all[whichLayer];
-  else if( document.layers ) // this is the way nn4 works
-    elem = document.layers[whichLayer];
-  vis = elem.style;
-  // if the style.display value is blank we try to figure it out here
-  if(vis.display==''&&elem.offsetWidth!=undefined&&elem.offsetHeight!=undefined)
-    vis.display = (elem.offsetWidth!=0&&elem.offsetHeight!=0)?'block':'none';
-  vis.display = (vis.display==''||vis.display=='block')?'none':'block';
-}
-</script>
-</HEAD><BODY BGCOLOR="#FFFFFF">
-<div id="leftcontent">
-[&nbsp;<A href="http://wing.comp.nus.edu.sg/">WING homepage</A>&nbsp;]<br/>
-[&nbsp;<A href="http://wing.comp.nus.edu.sg/portal/web-services.html">WING web services</A>&nbsp;]<br/>
-<BR/>
-[&nbsp;<A href="#d">Download</a>&nbsp;]<br/>
-[&nbsp;<A href="#ws">Web Service</a>&nbsp;]</br/>
-[&nbsp;<A href="#wd">Web Demo</a>&nbsp;]<br/>
-[&nbsp;<A href="#p">Publications</a>&nbsp;]<br/>
-[&nbsp;<A href="#gsiso">Input and Output</a>&nbsp;]<br/>
-[&nbsp;<A href="#gm">Group Members</a>&nbsp;]<br/>
-[&nbsp;<A href="#faq">FAQ</a>&nbsp;]<br/>
-[&nbsp;<A href="#t">Troubleshooting</a>&nbsp;]<br/>
-<br/>
-<script type="text/javascript"
-src="http://feedjit.com/serve/?vv=932&tft=3&dd=0&wid=0804fba767d140cd&pid=0&proid=0&bc=FFFFFF&tc=000000&brd1=012B6B&lnk=135D9E&hc=FFFFFF&hfc=2853A8&btn=C99700&ww=200&wne=10&wh=WING+Live+Traffic+Feed&hl=0&hlnks=0&hfce=0&srefs=1&hbars=0"></script><noscript><a
-href="http://feedjit.com/">Feedjit Live Blog Stats</a></noscript>
-</div>
-<div id="centercontent">
-<IMG ALIGN="LEFT" SRC="parsCit.png" WIDTH="200px" ALT="Picture of ParsCit Swami">
-<CENTER><H1>ParsCit: An open-source CRF Reference String and Logical Document Structure Parsing Package</H1></CENTER>
-<P>This is the home page of the ParsCit project, which performs two
-tasks: 1) reference string parsing, sometimes also called citation
-parsing or citation extraction, and 2) logical structure parsing of
-scienfific documents.  It is architected as a supervised machine
-learning procedure that uses Conditional Random Fields as its learning
-mechanism.  You can download the code below, parse strings online, or
-send batch jobs to our web service.  The code contains both the
-training data, feature generator and shell scripts to connect the
-system to a web service (used on this web site).</P>
-<P>Some definitions (thanks to Robert Dale for Citations and Reference
-Strings):
-<DL>
-  <DT>Reference String:</DT><DD>A text string in the bibliography or
-  reference section of a work, usually at the end of the document that
-  refers to a unique document.  Usually occurs with other reference
-  strings that point to other documents. May also appear as
-  footnotes.</DD>
-  <DT>Citation:</DT><DD>A text string (usually explicit) in the
-  document body that points to a corresponding reference string at the
-  end of the document.  Several citations may co-refer to a single
-  reference string.</DD>
-  <DT>Document Logical Structure:</DT><DD> A hierarchy of logical
-  components, for example, titles, authors, affiliations, abstracts,
-  sections, etc., according to (Mao, Rosenfeld &amp;
-  Kanungo,2003). Our logical structure is more comprehensive,
-  comprising not only header metadata and references, but also the
-  logical structure of the internals of the document -- sections,
-  subsections, figures, tables, equations, footnotes and
-  captions. </DD>
-</DL>
-<P>This project deals with the problem of parsing the reference
-strings and parsing the logical structure of a document.  The first
-task is handled by a module with the project namesake, ParsCit, and
-the second task by a separate module SectLabel.
-</P>
-<br clear="all"/>
-<!-- License ---------------------------------------------------------------------- -->
-<A NAME="l"></A><H2>License</H2>
-<P>This software is licensed under the <A
-HREF="http://www.gnu.org/copyleft/lesser.html">Lesser GNU Public
-License</A> (LGPL), which means you are free to use it for any
-purpose, including embedding in commercial products.  </P>
-<br clear="all" />
-<!-- Download ---------------------------------------------------------------------- -->
-<A NAME="d"></A><H2>Download</H2>
-<P>You can download the open-source code for ParsCit here.  The source requires you to re-compile the CRFPP source code
-and assumes that perl is installed on your system and can be invoked
-using <CODE>perl</CODE> (must be in your path).
-</P>
-<ul>
-  <li> Current version <A HREF="parscit-110505b.zip">110505b</A>: Added XML::Twig for XML processing. ParsCit now uses input provided by SectLabel. See <A HREF="CHANGELOG.txt"> CHANGELOG.txt </A>.<BR/>
-  The (partially ported) <A HREF="parscit-110505b-win.zip">Windows</A> version is here (provided by Yumichika).  See the <A HREF="CHANGES%20FOR%20WINDOWS.txt">CHANGES FOR WINDOWS.txt</A>
-<BR/>
-  <BR/>
-  We have also pushed a copy of the ParsCit current distribution into <A HREF="http://www.github.com/knmnyn/parscit">GitHub:knmnyn/parscit</A>.
-  The Windows version has also been pushed to <A HREF="http://www.github.com/wing-nus/parscit">GitHub:wing-nus/parscit</A>.
-  While we'll strive to keep the GitHub version as updated as possible, the versions on this page will remain the most authoritative for major updates.
-  <BR/>
-  <li> Other versions: <BR/>
-<A HREF="parscit-101101.zip">101101</A>: Incorporated <A HREF="http://github.com/mromanello/BiblioScript">BiblioScript</A> and <A HREF=http://www.scripps.edu/~cdputnam/software/bibutils>BibUtils</A> software. See CHANGELOG.txt; <BR/>
-<A HREF="parscit-100401.zip">100401d</A>: Added SectLabel (logical structure parsing) software from the NUS team, and Iconip training data from Cheong Chi Hong for ParsCit with new ParsCit model retrained. See CHANGELOG.txt; <BR/>
-<A HREF="parscit-090625.zip">090625b</A>: Added documentation for complete re-installation. Improved ParsHed with added line-level CRF model together and post-processing module by NUS team, WSDL file and client for service at NUS and minor bug fixes for ParsCit. See CHANGELOG.txt; <BR/>
-<A HREF="parscit-090316.zip">090316</A>: Incorporation of ParsHed (header parsing) software from the NUS team. See CHANGELOG.txt; <BR/>
-<A HREF="parscit-081201.zip">081201</A>: Bug fixes and incorporation of byte position offset from the Scienstein.org team. See CHANGELOG.txt; <BR/>
-<A HREF="parscit-080917.zip">080917</A>: Minor changes (improved models and mulilingual support), see CHANGELOG.txt; <BR/>
-<A HREF="parscit-080402.zip">080402</A>: First public release.  Comes with precompiled linux binaries for CRF++; <BR/>
-<A HREF="parscit-080310.tgz">080310</A>: Beta release.
-  <li><A HREF="http://crfpp.sourceforge.net">CRF++</A>: A conditional random fields toolkit that you may need to install, if the compiled one does not work for you.  We recommend that you use version 0.51. </ul>
-<!-- Web Service ---------------------------------------------------------------------- -->
-<A name="ws"></a><H2>Web Service</h2>
-<P>More NLP services are now being made available on the web.
-Following this trend you can send your plain text citations to use via
-our web service.  We will parse these for you free of charge (as and
-when time and processing power allows, these processes are done with
-lower priority).</P>
-<P CLASS="red">N.B. We keep logs of what's parsed in these demos, to
-improve the accuracy and productivity of ParsCit. If you'd like these
-to be kept private or you find you use this service a lot, why not
-install a local copy of ParsCit for yourself?  If you do, please
-let us know where you are so we acknowledge you here and can re-direct
-some traffic your way.
-</P>
-<UL>
-  <LI> <A HREF="wing.nus.wsdl">Download the WSDL file</A> for the service at NUS.
-  <LI> <A HREF="ParsCitClientWSDL.rb">Download the sample ruby client
-       that uses the WSDL file</A> to dynamically generate the ParsCit web
-       service call to the NUS server.  Edit the file to see how to
-       execute it.
-  <LI> <A HREF="ParsCitClient.rb">Download sample ruby client code</a>
-       for the ParsCit web service at the NUS server.  To execute,
-       just point it at a local
-       text file that represents the text dump of a scholarly article
-       (such as one produced by a PDF to text converter):
-       <CODE>
-       ./ParsCitClient.rb ~/public_html/samples/E06-1050.txt
-       </CODE>
-  <LI><FORM METHOD="post" ACTION="parsCit.cgi"><INPUT TYPE="HIDDEN"
-       NAME="ping" VALUE="ping"><INPUT TYPE="SUBMIT" VALUE="Check"> whether
-       the web service is up.
-       </FORM>
-</UL>
-<!-- Web demo ----------------------------------------------------------------------- -->
-<A name="wd"></a><H2>Web-based Demonstration</H2>
-<P CLASS="red">N.B.: We keep logs of what's parsed in these demos, to
-improve the accuracy and productivity of ParsCit. If you'd like these
-to be kept private, why not install a local copy of ParsCit for
-yourself?</P>
-<P>You can also run ParsCit directly in your browser.  The form below
-submits your text input (after suitable cleaning) to the ParsCit
-service to parse the input file or strings.  <FONT COLOR="red">
-Note that if system loads gets high, your demo call may not be executed.  If you want to run this program in batch, please download your own copy.</FONT>
-</P>
-<P><B>Demo #1: Parsing the header, logical structure and/or reference strings (and citation contexts) from a text file</B></P>
-<DIV STYLE="background-color:D0D0FF; padding: 1em">
-<FORM ENCTYPE="multipart/form-data" METHOD="post" ACTION="parsCit.cgi">
-<P>NB - this demo does not handle PDF input at this time.  You can use another web service or software to convert PDFs to text. </P>
-<P style="font-size:small;"><I>Internal key (if applicable):</I> <INPUT TYPE="password" SIZE="4" NAME="key"></P>
-<INPUT TYPE="text" SIZE="80" NAME="demo" value="1" style="display:none;">
-<P>Input Method 1) Enter a URL to a file on the web (e.g., <A HREF="http://wing.comp.nus.edu.sg/~wing.nus/samples/E06-1050.txt">http://wing.comp.nus.edu.sg/~wing.nus/samples/E06-1050.txt</A> or <A HREF="http://wing.comp.nus.edu.sg/~wing.nus/samples/W06-0102.txt">http://wing.comp.nus.edu.sg/~wing.nus/samples/W06-0102.txt</A>).<BR/>
-<INPUT TYPE="text" SIZE="80" NAME="urlfile">
-</P>
-<P>Input Method 2) Upload a .txt file (ASCII; UTF-8)<BR/>
-<INPUT TYPE="FILE" NAME="datafile">
-</P>
-<P>Input Method 3) Paste the whole file here:
-<br/>
-<TEXTAREA ROWS="4" COLS="80" NAME="textfile">
-</TEXTAREA>
-</P>
-<P>Parse the document using the following options
-<SELECT NAME="ParsCitOptions">
-  <OPTION SELECTED VALUE="5">all</OPTION>
-  <OPTION VALUE="1">citations</OPTION>
-  <OPTION VALUE="2">header</OPTION>
-  <OPTION VALUE="4">section</OPTION>
-</SELECT>
-</P>
-<P>Citation export formats
-  <INPUT TYPE=CHECKBOX NAME="ads1">ADS
-  <INPUT TYPE=CHECKBOX NAME="bib1" CHECKED>BIB
-  <INPUT TYPE=CHECKBOX NAME="end1">EndNote
-  <INPUT TYPE=CHECKBOX NAME="isi1">ISI
-  <INPUT TYPE=CHECKBOX NAME="ris1">RIS
-  <INPUT TYPE=CHECKBOX NAME="wordbib1">WordBib
-</P>
-<br/><CENTER><INPUT TYPE="SUBMIT" VALUE="Parse this file!"></CENTER>
-</FORM>
-</DIV>
-<P><B>Demo #2: As above but using XML input (XML must conform to Omnipage output). This demo is slow so please be patient.</B></P>
-<DIV STYLE="background-color:D0D0FF; padding: 1em">
-<FORM ENCTYPE="multipart/form-data" METHOD="post" ACTION="parsCit.cgi">
-<INPUT TYPE="text" SIZE="80" NAME="demo" value="2" style="display:none;">
-<P style="font-size:small;"><I>Internal key (if applicable):</I> <INPUT TYPE="password" SIZE="4" NAME="key"></P>
-<P>Input Method 1) Enter a URL to a file on the web (e.g., <A HREF="http://wing.comp.nus.edu.sg/~wing.nus/samples/E06-1050.xml">http://wing.comp.nus.edu.sg/~wing.nus/samples/E06-1050.xml</A> or <A HREF="http://wing.comp.nus.edu.sg/~wing.nus/samples/W06-0102.xml">http://wing.comp.nus.edu.sg/~wing.nus/samples/W06-0102.xml</A>).<BR/>
-<INPUT TYPE="text" SIZE="80" NAME="urlfile">
-</P>
-<P>Input Method 2) Upload a .xml file (ASCII; UTF-8)<BR/>
-<INPUT TYPE="FILE" NAME="datafile">
-</P>
-<P>Input Method 3) Paste the whole .xml file here:
-<br/>
-<TEXTAREA ROWS="4" COLS="80" NAME="textfile">
-</TEXTAREA>
-</P>
-<P>Input Method 4) Upload your own .pdf file (less than 50 pages & smaller than 10MB):
-<br/>
-<INPUT TYPE="FILE" NAME="pdffile">
-</P>
-<P>Parse the document using the following options
-<SELECT NAME="ParsCitOptions">
-  <OPTION SELECTED VALUE="5">all</OPTION>
-  <OPTION VALUE="1">citations</OPTION>
-  <OPTION VALUE="2">header</OPTION>
-  <OPTION VALUE="4">section</OPTION>
-</SELECT>
-</P>
-<P>Citation export formats
-  <INPUT TYPE=CHECKBOX NAME="ads2">ADS
-  <INPUT TYPE=CHECKBOX NAME="bib2" CHECKED>BIB
-  <INPUT TYPE=CHECKBOX NAME="end2">EndNote
-  <INPUT TYPE=CHECKBOX NAME="isi2">ISI
-  <INPUT TYPE=CHECKBOX NAME="ris2">RIS
-  <INPUT TYPE=CHECKBOX NAME="wordbib2">WordBib
-</P>
-<br/><CENTER><INPUT TYPE="SUBMIT" VALUE="Parse this file!"></CENTER>
-</FORM>
-</DIV>
-<!--
-<P><B>Demo #2b: OCR a PDF file using Omnipage (less than 50 pages & smaller than 10MB).</B></P>
-<DIV STYLE="background-color:D0D0FF; padding: 1em">
-<FORM ENCTYPE="multipart/form-data" METHOD="post" ACTION="upload.cgi">
-<P style="font-size:small;"><I>Internal key (if applicable):</I> <INPUT TYPE="password" SIZE="4" NAME="key"></P>
-<P>File to OCR (PDF only): <INPUT TYPE="FILE" NAME="content"></P>
-<br/><CENTER><INPUT TYPE="SUBMIT" VALUE="OCR this file!"></CENTER>
-</FORM>
-</DIV>
--->
-<P><B>Demo #3: Parsing individual reference strings only (just <CODE>extract_citations</CODE>)</B></P>
-<DIV STYLE="background-color:D0D0FF; padding: 1em">
-<FORM ENCTYPE="multipart/form-data" METHOD="post" ACTION="parsCit.cgi">
-<INPUT TYPE="text" SIZE="80" NAME="demo" value="3" style="display:none;">
-<P style="font-size:small;"><I>Internal key (if applicable):</I> <INPUT TYPE="password" SIZE="4" NAME="key"></P>
-<P>Input Method 1) Enter a URL to a file on the web in the correct format (each line should be a separate citation; e.g., <A
-HREF="http://wing.comp.nus.edu.sg/~wing.nus/samples/E06-1050.cite">http://wing.comp.nus.edu.sg/~wing.nus/samples/E06-1050.cite</A> or <A
-HREF="http://wing.comp.nus.edu.sg/~wing.nus/samples/W06-0102.cite">http://wing.comp.nus.edu.sg/~wing.nus/samples/W06-0102.cite</A>).
-<INPUT TYPE="text" SIZE="80" NAME="urllines">
-</P>
-<P>Input Method 2) Upload a file (again, each line should be a separate citation)<BR/>
-<INPUT TYPE="FILE" NAME="datalines">
-</P>
-<P>Input Method 3) Enter a list of plain text citations (again, one per line):<BR/>
-<TEXTAREA ROWS="4" COLS="80" NAME="textlines">Isaac G. Councill, C. Lee Giles, Min-Yen Kan. (2008) ParsCit:   An open-source CRF reference string parsing package.  To appear in the proceedings of the Language Resources and Evaluation Conference (LREC 08), Marrakesh, Morrocco, May.
-</TEXTAREA>
-</P>
-<P>Citation export formats
-  <INPUT TYPE=CHECKBOX NAME="ads3">ADS
-  <INPUT TYPE=CHECKBOX NAME="bib3" CHECKED>BIB
-  <INPUT TYPE=CHECKBOX NAME="end3">EndNote
-  <INPUT TYPE=CHECKBOX NAME="isi3">ISI
-  <INPUT TYPE=CHECKBOX NAME="ris3">RIS
-  <INPUT TYPE=CHECKBOX NAME="wordbib3">WordBib
-</P>
-<br/><CENTER><INPUT TYPE="SUBMIT" VALUE="Parse these lines!"></CENTER>
-</FORM>
-</DIV>
-<!-- Publications  ---------------------------------------------------------------------- -->
-<A name="p"></a><H2>Publications</H2>
-<P><B>Journal Papers:</B>
-<UL>
-  <LI> Minh-Thang Luong, Thuy Dung Nguyen and Min-Yen Kan (forthcoming)
-       <U>Logical Structure Recovery in Scholarly Articles with Rich
-       Document Features</U>.  Forthcoming in the International
-       Journal of Digital Library Systems.  <BR/>
-       [ <A HREF="ijdls-SectLabel.pdf">pre-print .pdf</A> ]
-</UL>
-<P><B>International Referreed Conference Publications:</B>
-<UL>
-  <LI> Isaac G. Councill, C. Lee Giles, Min-Yen Kan. (2008)
-       <U>ParsCit: An open-source CRF reference string parsing
-       package</U>.  In Proceedings of the Language Resources and
-       Evaluation Conference (LREC 08), Marrakesh, Morrocco, May.
-       <BR/> [ <A HREF="lrec08/lrec08.pdf">.pdf</A> ]
-       [ <A HREF="lrec08b.png">Poster (.png)</A> ]
-</UL>
-<P><B>Others:</B>
-<UL>
-  <LI> Yong Kiat Ng. (2004) <U>Citation Parsing Using Maximum Entropy
-       and Repairs</U>.  Undergraduate thesis.  National University of
-       Singapore.  <BR/>
-       [ <A HREF="yongKiatNgThesis.pdf">.pdf</A> ]
-</UL>
-<!-- Output ---------------------------------------------------------------------- -->
-<A name="gsiso"></a><H2>Gold Standard Input and Sample Output</H2>
-<UL>
-  <LI>Chunk tagged data for <A HREF="cora.tagged.txt">Cora</A>, <A
-    HREF="citeseerx.tagged.txt">CiteSeer<SUP>X</SUP></A>, <A
-    HREF="flux-cim-cs.tagged.txt">FLUX-CiM</A> and humanities (<A
-    HREF="it-humanities.tagged.txt">Italian</A>, <A
-    HREF="en-humanities.tagged.txt">English</A>, and <A
-    HREF="mixed-humanities.tagged.txt">mixed language</A>) datasets
-    (suitable for ParsCit training).  For FLUX-CiM data, please try
-    the original hosting site maintained by Eli Cortez.  Credits to
-    Matteo Romanello for contributing the humanities datasets.
-  <LI> <A HREF="iconip.tagged.txt">Chunk tagged data for some ICONIP
-    papers</A>. Contributed by Cheong Chi Hung.
-  <LI>Results of running the v080917 version of ParsCit on FLUX-CiM's
-    dataset for [ <A HREF="flux-cim-cs.out.xml">300 computer science
-    references</A> ] [ <A HREF="flux-cim-med.out.xml">2000 medical
-    references</A> ] [ <A HREF="cora.out.xml">on the CORA dataset</A>
-    ].  Note that these results are considered cheating as current
-    version has been trained on this data.
-  <LI> Tagged section data for the SectLabel module.  <BR/> [ <A
-    HREF="sectLabelXML.tagged.txt">XML Format</A> ] [ <A
-    HREF="sectLabel.tagged.txt">Plain Text Format</A> ]<BR/>
-       [ <A HREF="genericSect.tagged.txt">GenericSect training data</A> ]
-</UL>
-<!-- Group Members  ---------------------------------------------------------------------- -->
-<A name="gm"></a><H2>Group Members</H2>
-<UL>
-  <LI> <A HREF="http://www.comp.nus.edu.sg/~kanmy">Min-Yen Kan</A> - Project leader, NUS
-  <LI> <A HREF="http://www.personal.psu.edu/igc2/">Isaac G. Councill</A>, The Pennsylvania State University
-  <LI> <A HREF="http://clgiles.ist.psu.edu/">C. Lee Giles</A>, The Pennsylvania State University
-  <LI> <A HREF="http://wing.comp.nus.edu.sg/~lmthang">Minh-Thang Luong</A> - Research Assistant (alumnus), NUS
-  <LI> Yong Kiat Ng - Final year undergraduate student (graduated, 2004), NUS
-  <LI> Thuy Dung Nguyen - Research Assistant (alumnus), NUS
-  <LI> Huy Nhat Hoang Do - Research Assistant, NUS
-</UL>
-<!-- FAQ ---------------------------------------------------------------------- -->
-<A name="faq"></a><H2>FAQ</H2>
-<DL>
-  <DT>What platforms does ParsCit work on?</DT>
-  <DD>ParsCit works on all major platforms: Windows, Linux and MacOS.
-    The installation requires ruby and perl and the CRF++ embedded
-    package also requires standard UNIX utilities like sed.  You
-    should have a working knowledge of UNIX and some experience in
-    installing UNIX tools.  Due to our time constraints, we may not be
-    able answer your particular problems with installation.  Do let us
-    know if there was something important that you had to do to get
-    your particular download and installation working; we'll
-    incorporate it into the Troubleshooting section below.</DD>
-  <DT>What is the difference of SectLabel and previous ParsHed?</DT>
-  <DD>SectLabel is a newly-developed module that further extends
-     ParsHed in functionality. It not only classifies header metadata,
-     but analyzes full documents to output the logical structure of
-     the internals of the document -- sections, subsections, figures,
-     tables, equations, footnotes and captions. <BR/> For compatibility
-     issues, the ParsHed module is still retained in our source code
-     and command line options.  </DD>
-  <DT>How do I retrain ParsCit for a different language? I saw code in
-    lib/ParsCit/PreProcess' to find the beginning of the bibliography
-    section, and changed that but it doesn't work.</DT>
-  <DD>The current version does not depend on those regular expressions
-    anymore, they are for previous versions (e.g., v101101).  ParsCit
-    now first labels each line using the SectLabel module and
-    discovers which lines to parse references for based on the first
-    step's output.  You need to retrain SectLabel for this, by
-    providing labeled data about what class of line each line in your
-    training data is.  It's also possible to "downgrade" the current
-    version to go back to use the rule-based method for identifying
-    the reference section.</DD>
-  <DT>What is the "genericHeader" in the output of SectLabel?  What is
-  the difference between "genericSect.tagged" and "SectLabel.tagged"?</DT>
-  <DD>Generic headers, such as introduction, methodology, and
-     evaluation, represent generic purposes of different sections in a
-     scholarly article.  We map all section names to generic ones
-     (i.e., "5. Text Features" to "Methodology").  This promotes
-     comparative viewing of sections with identical purpose across
-     articles.  For the second question, actually, Generic section is
-     a component of SectLabel.  It is responsible for classifying the
-     section headers of a paper into the generic categories such as
-     Introduction, Methodology, Result, etc.  For details refer to our
-     IJDLS journal paper.
- </DD>
-  <DT>Why is there an option to input file in XML format? Which DTD
-    should it follow?</DT>
-  <DD>SectLabel is a robust logical document structure inference
-    system that can handle both rich input (produced by OCR software
-    such as font or spatial features) to boost recognition
-    performance, but still be able to perform inference on
-    impoverished input (plain text) with degraded
-    performance. Currently, the XML input must be in the form of
-    output from Nuance OmniPage (version 16)'s XML format, and hence,
-    should follows the DTD by OmniPage.  Note: The ParsCit team is not
-    affiliated with Nuance in any way nor does it endorse
-    OmniPage.</DD>
-  <DT> I need to run ParsCit but I can't get well-formed text from my
-    PDF documents.  Can you help?</DT>
-  <DD> No, we cannot help you with this.  We don't perform OCR or text
-    extraction from PDF documents.  You will have to find your own
-    source for doing the extraction or conversion.  We've found
-    Omnipage useful in our own project work (hence the possibility of
-    XML input), but we don't endorse any product.</DD>
-  <DT> The OmniPage XML doesn't seem to be well-formed.  Is that OK?</DT>
-  <DD> Yes. The sample "XML" provided in the links (for Demo 2) are
-    actual outputs for a sequence of XML pages (one XML file per
-    page).  If you use OmniPage to save an XML file for input to
-    ParsCit, make sure to save individual pages as separate files,
-    then concatenate them to send to ParsCit.  You may want to
-    download the sample links for inspection (as they are
-    concatenations of several XML files, your browser will likely
-    complain about them not being well-formed.</DD>
-  <DT> I ran Demos 1 and 2 with the default "all" settings, but sections
-    don't seem to be detected.</DT>
-  <DD> There's no problem.  The demo just hides the SectLabel output
-    by default.  Click "Show SectLabel output" to reveal it.</DD>
-  <DT> I ran ParsCit using the OmniPage XML output, but encountered malformed UTF8 character errors.</DT>
-  <DD> OmniPage normally outputs XML results in UTF-16 format, a conversion into UTF-8 will solve the problem, see below: </BR>
-<I>&nbsp; &nbsp; &nbsp; iconv --from-code UTF-16 --to-code UTF-8 omnipageOutput.xml > newOmnipageOutput.xml</I>
-</DD>
-</DL>
-<!-- Troubleshooting ---------------------------------------------------------------------- -->
-<A name="t"></a><H2>Troubleshooting</H2>
-<P> A list of common problems with ParsCit.  If you find problems,
-email the lead developer at &lt;kanmy@comp.nus.edu.sg&gt;.  Please use
-the subject "[ParsCit]" to ensure that it reaches our attention.  If
-you have hand-corrected tagged data that you don't mind providing us,
-we can use that to further improve ParsCit's extracting capabilities.
-Nevertheless, there are problems with the output occasionally.  Below
-are some common problems people have encountered.
-<DL>
-  <DT>ParsCit v110505 seems to be a lot slower when used on Omnipage
-  output than the previous versions, why?</DT>
-  <DD>You are correct.  We are now using XML::Twig to do the XML
-  processing correctly, rather than do it ad-hoc ourselves, but this
-  requires constructing an exhaustive DOM tree for the Omnipage input.
-  This is the timesink that you are experiencing.</DD>
-  <DT>I'm running ParsCit on Windows but I can't get it to work, even
-  after installing a perl interpreter.  Specifically, the
-  citeExtract.pl program dies complaining that it Can't open
-  "/tmp/...." at line 175.  </DT>
-  <DD>ParsCit hasn't been fully tested on windows at NUS, so we can't
-  vouch for whether it will run correctly. In this specific error
-  case, the "/tmp/" directory (a standard place for temporary files in
-  UNIX systems) is normally not available in Windows, and may generate
-  problems.  You may need to change the code and/or create an
-  appropriate directory for ParsCit to generate such files.</DD>
-  <DT>I tried downloading and running ParsCit but I get complaints
-    about /bin/sed and crf not being found.  Help?</DT>
-  <DD>Please read the INSTALL.txt directions.  You need to recompile
-    CRF++ for your platform.  The paths included with the install are
-    for our version, you need to recompile to have the paths point
-    correctly.</DD>
-  <DT>When running citeExtract.pl I get some errors complaining about
-  the wrong ELF class of the binaries.  How can I fix this?</DT>
-  <DD>This seems to be a problem with the compiled executables of
-  CRF++ bundled with the software.  Follow the INSTALL instructions
-  but after step 1 do:
-<P>
-<CODE>$ cp -Rf * ../../.libs
-$ cp crf_learn ../../.libs/lt-crf_learn<BR/>
-$ cp crf_test ../../.libs/lt-crf_test<BR/>
-</CODE></DD>
-  <DT>I'm trying to install parscit v110505 using the instructions in the install file, and when I get to the point where you're supposed to recompile CRF, it exists with an error:<BR/>
-<PRE>In file included from node.h:13:0,
-                   from node.cpp:9:
-		   path.h:26:52: error: 'size_t' has not been declared
-		   make[1]: *** [node.lo] Error 1
-		   make[1]: Leaving directory `/home/agarnett/parscit/crfpp/CRF++-0.51'
-		   make: *** [all] Error 2</PRE><BR/>
-The install file mentions that this may fail the first time; unfortunately for me, it keeps failing. any help?</DT>
-  <DD>The error is from CRF++ package (not from ParsCit), there are two ways to fix it:<BR/>
-  1.  Add the line.  <CODE>#include&lt;iostream&gt;</CODE> in node.cpp and compile crf++ again, or;<BR/>
-  2.  Go to <A HREF="http://crfpp.googlecode.com/svn/trunk/doc/index.html">http://crfpp.googlecode.com/svn/trunk/doc/index.html</A> and download the latest version.  The instruction is the same.  Hope this helps.</DD>
-  <DT>Issue numbers don't get extracted.</DT>
-  <DD><SPAN CLASS="red">This issue should be fixed as of the v110505
-  release.</SPAN> There is now some heuristic postprocessing code to
-  take care of breaking single or multiple tokens for issues and
-  volumes. </DD>
-  <DT>Separation of author names and publishing year fails</DT>
-  <DD> In some reference data with non-standard sequences of
-    first names and family names, e.g.
-<pre>
-  Baltes, Paul, Ursula Staudinger, Ulmann Lindenberger (1999): Lifespan
-  psychology: theory and application of intellectual functioning; in:
-  Annual Review of Psychology, 50, 471-507
-</pre>
-    ParsCit's post processing step may not detect and deal with these
-    problems reliably.  We're working to fix these too. </DD>
-  <DT>I passed ParsCit plain text output but in another, non-English
-    language.  I didn't get good results or I got empty results.  Can
-    you help? </DT>
-  <DD>Aside from English, ParsCit can handle Italian and German to a
-    limited extent, thanks to the multilingual training data.
-    However, the demo web interface uploads non-ASCII (e.g., UTF-8 or
-    UTF-16 data) as binary data and fails to execute ParsCit.
-    However, if you download a copy of ParsCit, the libraries do work
-    on such data.  Here's a <A
-    HREF="humanities.test.out.xml">sample</A>. We'd love to help make
-    a more universal model that can accommodate reference strings in
-    other languages.  If you're willing to help contribute ground
-    truth data, we love to hear from you!</DD>
-  <DT>How about retraining ParsCit for another language/domain?</DT>
-  <DD>You can put your supervised exemplar data into the same format
-    as tagged_references.txt found in crfpp/traindata/.  Once you have
-    this file you can generate the appropriate model for ParsCit, by
-    using three commands (assumes you are in the crfpp/traindata
-    directory):
-       <P>
-<CODE>$ ../../bin/tr2crfpp.pl tagged_references.txt > parsCit.train.data
-<BR/>
-$ ../crf_learn parsCit.template parsCit.train.data model
-<BR/>
-$ mv model ../../resources/parsCit.model
-</CODE>
-   <P>The first command creates the input feature file that crfpp uses
-     from the training data.  The second creates the model using the
-     crf_learn command.  You can then move the model file to the
-     resources/ subdirectory where it can be utilized.  To replace the
-     default model that comes with ParsCit, just execute the final
-     command.  </DD>
-  <DT> Can I retrain the package for a different set of tags if I
-     change the tagset in the training data?</DT>
-  <DD> Yes, you should be able to change the tagset to suit your
-     dataset.  You can add, eliminate and change the tagset as you
-     wish.  You need to retrain the parser system after creating your
-     tag data.  For more details on the training process, see the
-     documentation for CRF++, that is on the web at sourceforge.
-     </DD>
-  <DT>When retraining I get a "bad_alloc" error.  What gives?</DT>
-  <DD>We're not entirely sure of this.  CRF training is quite memory
-     intensive and running a large amount of training data tuples may
-     cause the embedded CRF++ package to fail.  You can try with less
-     training data, or try training on a machine with a larger amount
-     of RAM.  </DD>
-  <DT>Does the web service actually work? I can't seem to run it.</DT>
-  <DD>Occasionally our school's networking staff changes the firewall
-    settings, so the port for our group's web services may be blocked
-    (port 4000 on host wing.comp.nus.edu.sg).  If you find you can't
-    reach our services (they time out), please let us know.  </DD>
-    <DT>I get funny errors with crf_test not being useful.  How do I
-    fix this?</DT> <DD>The updated README.txt file in the 090625b
-    distribution fixes this.  Basically you need to recompile CRF++
-    0.51 and place the libraries and the executables in the proper
-    place. See the README for details.</DD>
-</DL>
-<!-- Kudos ---------------------------------------------------------------------- -->
-<H2>Kudos</H2>
-<p>ParsCit owes its continued maintenance and support from its user
-base.  Here we'd like to thank them for their help.</p>
-<P>Thanks to David Judd who reconfigured how CRF++ is located with
-respect to the main code.  Thanks to Alex Garnett in spotting more
-problems with CRF dependencies.  Thanks to George E. Raptis and Eric
-Tran for the port to Windows.  Thanks to Zhu Ying-Bo
-(yumichika@163.com) from the Language Computing and Web Mining Group,
-Institute of Computer Science and Technology of Peking University for
-the partial port to Windows.  Thanks to Yustus Oktian for questions
-about training for another language.  Thanks to Madhur Kapoor for
-asking questions about PDF conversion.  Thanks to Behrang Qasemizadeh
-for reporting problems with truncation of XML entities in XML output
-(v110505).  Thanks Tim Brody for his BiblioScript patch.  Thanks to
-David Jurgens for suggesting that remove temporary files after runs
-(v110505).  Thanks Nikolay Nikolov for suggesting the conversion of
-OmniPage XML results from UTF-16 to UTF-8 to avoid encoding
-problems. Thanks to Matteo Romanello for the suggestion and permission
-to incorporate BiblioScript software (v101101). Many thanks to Kris
-Jack for pointing out problems with the ELF binaries and an
-appropriate fix.  Thanks to Cheong Chi Hong for fixing problems with
-Preprocess.pm (v100401) and contributing the ICONIP training data and
-XML entity problems in reference string parsing (v100401).  Thanks to
-Priya Venkateshan for pointing out sudo/root installation
-possibilities (v100401).  Thanks to Mario Lipinski for reporting
-punctuation stripping problems in reference string parsing (v100401).
-Thanks to Artemy Kolchinsky for fixes in Preprocess.pm
-(v090625). Thanks to Matteo Romanello for the humanities training
-datasets.  Thanks to Dain Kaplan for helping us fix the Preprocess.pm
-bug.  Thanks to Ayeh Bandeh-Ahmadi for correcting the warning in
-parseRefString.pl.  Thanks to Nick Friedrich and J&ouml;ran Beel of
-scienstein.org for all fixes in the v081201 version of ParsCit.  Also
-thanks to Madian Khabsa for indicating problems with installation to
-MacOS.</p>
-<P>ParsCit is used by many projects worldwide, and not just in
-experimental, research and academic places, but in commercial
-snterprises as well.  <A HREF="http://www.mendeley.com/">Mendeley</A>
-is using ParsCit to parse references from contributed papers, as is
-the <A HREF="http://citec.repec.org/">Citations in Economics
-(CitEc)</A> project.
-<!-- Related Links ---------------------------------------------------------------------- -->
-<H2>Related Links</H2>
-<P>Other, open-source citation parsers:
-<UL>
-  <LI> <A
-  HREF="http://freecite.library.brown.edu/welcome">FreeCite</A>:
-  supported by the Mellon Foundation and Brown University.  Written in
-  Ruby on Rails, with the same CRF++ backend.
-  <LI> An <A
-  HREF="http://purl.net/net/egh/hmm-citation-extractor/">Hidden Markov
-  Model Citation Extractor</A>: written by Erik Hetzner of the
-  California Digital Library.
-</UL>
-<P> Other related links.  Contact Min below to get your other related
-software listed here.  Thanks!
-<UL>
-  <LI> Perhaps you're interested in open source code for libraries?
-    If so try the <A
-    HREF="http://dewey.library.nd.edu/mailing-lists/code4lib/">CODE4LIB
-    mailing list</A>.
-  <LI> <A
-    HREF="https://wiki.birncommunity.org:8443/display/NEWBIRNCC/LATISI+-+Literature+Annotation+Tool+from+the+Information+Sciences+Institute">LATISI
-    - Literature Annotation Tool from the Information Sciences
-    Institute</A>.  A related project from ISI, using MBL instead of CRF.
-  <LI> <A HREF="http://www.scienstein.org">Scienstein.org</A>: A
-    recommendation system for papers.
-  <LI> PdfBox: An open-source package for extracting text information
-    from PDF files.  Does not deal with problems with custom font
-    encodings.
-</UL>
-<HR>
-<H5><ADDRESS><A HREF="http://www.comp.nus.edu.sg/~kanmy">Min-Yen Kan</A> &lt;<A HREF="mailto:kanmy@comp.nus.edu.sg">kanmy@comp.nus.edu.sg</A>&gt;</ADDRESS>
-Created on: Fri Dec 24 01:48:05 SGT 2004
-<!-- hhmts start -->
-| Version: 1.0
-| Last modified:
-Mon Mar  4 14:23:46 SGT 2013
-<!-- hhmts end -->
-</H5>
-</div>
-</BODY> </HTML>