biblicit 1.0 → 2.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (406) hide show
  1. data/.gitmodules +3 -0
  2. data/Gemfile +1 -1
  3. data/README.md +125 -30
  4. data/Rakefile +22 -0
  5. data/biblicit.gemspec +9 -7
  6. data/lib/biblicit/cb2bib.rb +10 -11
  7. data/lib/biblicit/citeseer.rb +14 -26
  8. data/lib/biblicit/extractor.rb +40 -19
  9. data/lib/biblicit/parscit.rb +38 -0
  10. data/parscit/.gitignore +8 -0
  11. data/parscit/CHANGELOG +125 -0
  12. data/parscit/COPYING +674 -0
  13. data/parscit/COPYING.LESSER +165 -0
  14. data/parscit/INSTALL +105 -0
  15. data/parscit/README +97 -0
  16. data/{perl/ParsCit/README.TXT → parscit/USAGE} +25 -15
  17. data/parscit/bin/archtest.pl +31 -0
  18. data/parscit/bin/citeExtract.pl +562 -0
  19. data/parscit/bin/conlleval.pl +315 -0
  20. data/parscit/bin/headExtract.pl +40 -0
  21. data/parscit/bin/parsHed/convert2TokenLevel.pl +138 -0
  22. data/parscit/bin/parsHed/keywordGen.pl +308 -0
  23. data/parscit/bin/parsHed/parseXmlHeader.pl +141 -0
  24. data/parscit/bin/parsHed/redo.parsHed.pl +198 -0
  25. data/parscit/bin/parsHed/tr2crfpp_parsHed.pl +521 -0
  26. data/parscit/bin/parseRefStrings.pl +102 -0
  27. data/parscit/bin/phOutput2xml.pl +223 -0
  28. data/parscit/bin/redo.parsCit.pl +105 -0
  29. data/parscit/bin/sectExtract.pl +149 -0
  30. data/parscit/bin/sectLabel/README +110 -0
  31. data/parscit/bin/sectLabel/README.txt +110 -0
  32. data/parscit/bin/sectLabel/genericSect/crossValidation.rb +98 -0
  33. data/parscit/bin/sectLabel/genericSect/extractFeature.rb +104 -0
  34. data/parscit/bin/sectLabel/genericSectExtract.rb +53 -0
  35. data/parscit/bin/sectLabel/getStructureInfo.pl +156 -0
  36. data/parscit/bin/sectLabel/processOmniXML.pl +1427 -0
  37. data/parscit/bin/sectLabel/processOmniXML_new.pl +1025 -0
  38. data/parscit/bin/sectLabel/processOmniXMLv2.pl +1529 -0
  39. data/parscit/bin/sectLabel/processOmniXMLv3.pl +964 -0
  40. data/parscit/bin/sectLabel/redo.sectLabel.pl +219 -0
  41. data/parscit/bin/sectLabel/simplifyOmniXML.pl +382 -0
  42. data/parscit/bin/sectLabel/single2multi.pl +190 -0
  43. data/parscit/bin/sectLabel/tr2crfpp.pl +158 -0
  44. data/parscit/bin/tr2crfpp.pl +260 -0
  45. data/parscit/bin/xml2train.pl +193 -0
  46. data/parscit/lib/CSXUtil/SafeText.pm +130 -0
  47. data/parscit/lib/Omni/Config.pm +93 -0
  48. data/parscit/lib/Omni/Omnicell.pm +263 -0
  49. data/parscit/lib/Omni/Omnicol.pm +292 -0
  50. data/parscit/lib/Omni/Omnidd.pm +328 -0
  51. data/parscit/lib/Omni/Omnidoc.pm +153 -0
  52. data/parscit/lib/Omni/Omniframe.pm +223 -0
  53. data/parscit/lib/Omni/Omniline.pm +423 -0
  54. data/parscit/lib/Omni/Omnipage.pm +282 -0
  55. data/parscit/lib/Omni/Omnipara.pm +232 -0
  56. data/parscit/lib/Omni/Omnirun.pm +303 -0
  57. data/parscit/lib/Omni/Omnitable.pm +336 -0
  58. data/parscit/lib/Omni/Omniword.pm +162 -0
  59. data/parscit/lib/Omni/Traversal.pm +313 -0
  60. data/parscit/lib/ParsCit/.PostProcess.pm.swp +0 -0
  61. data/parscit/lib/ParsCit/Citation.pm +737 -0
  62. data/parscit/lib/ParsCit/CitationContext.pm +220 -0
  63. data/parscit/lib/ParsCit/Config.pm +35 -0
  64. data/parscit/lib/ParsCit/Controller.pm +653 -0
  65. data/parscit/lib/ParsCit/PostProcess.pm +505 -0
  66. data/parscit/lib/ParsCit/PreProcess.pm +1041 -0
  67. data/parscit/lib/ParsCit/Tr2crfpp.pm +1195 -0
  68. data/parscit/lib/ParsHed/Config.pm +49 -0
  69. data/parscit/lib/ParsHed/Controller.pm +143 -0
  70. data/parscit/lib/ParsHed/PostProcess.pm +322 -0
  71. data/parscit/lib/ParsHed/Tr2crfpp.pm +448 -0
  72. data/{perl/ParsCit/lib/ParsCit/Tr2crfpp.pm → parscit/lib/ParsHed/Tr2crfpp_token.pm} +22 -21
  73. data/parscit/lib/SectLabel/AAMatching.pm +1949 -0
  74. data/parscit/lib/SectLabel/Config.pm +88 -0
  75. data/parscit/lib/SectLabel/Controller.pm +332 -0
  76. data/parscit/lib/SectLabel/PostProcess.pm +425 -0
  77. data/parscit/lib/SectLabel/PreProcess.pm +116 -0
  78. data/parscit/lib/SectLabel/Tr2crfpp.pm +1246 -0
  79. data/parscit/resources/parsCit.model +0 -0
  80. data/parscit/resources/parsCit.split.model +0 -0
  81. data/{perl/ParsCit → parscit}/resources/parsCitDict.txt +205 -0
  82. data/parscit/resources/parsHed/bigram +10 -0
  83. data/parscit/resources/parsHed/keywords +10 -0
  84. data/parscit/resources/parsHed/parsHed.model +0 -0
  85. data/parscit/resources/parsHed/parsHed.template +178 -0
  86. data/parscit/resources/sectLabel/affiliation.model +0 -0
  87. data/parscit/resources/sectLabel/author.model +0 -0
  88. data/parscit/resources/sectLabel/funcWord +320 -0
  89. data/parscit/resources/sectLabel/genericSect.model +0 -0
  90. data/parscit/resources/sectLabel/sectLabel.config +42 -0
  91. data/parscit/resources/sectLabel/sectLabel.configXml +42 -0
  92. data/parscit/resources/sectLabel/sectLabel.model +0 -0
  93. data/sh/convert_to_text.sh +20 -0
  94. data/spec/biblicit/extractor_spec.rb +121 -0
  95. data/spec/fixtures/Review_of_Michael_Tyes_Consciousness_Revisited.docx +0 -0
  96. data/spec/fixtures/critical-infrastructures.ps +63951 -0
  97. data/spec/fixtures/txt/E06-1050.txt +867 -0
  98. data/spec/fixtures/txt/sample1.txt +902 -0
  99. data/spec/fixtures/txt/sample2.txt +394 -0
  100. data/spec/spec_helper.rb +3 -0
  101. data/{perl → svm-header-parse}/HeaderParseService/lib/HeaderParse/API/Function.pm +2 -20
  102. data/{perl → svm-header-parse}/HeaderParseService/lib/HeaderParse/API/MultiClassChunking.pm +0 -7
  103. data/{perl → svm-header-parse}/HeaderParseService/lib/HeaderParse/API/Parser.pm +0 -2
  104. data/{perl → svm-header-parse}/HeaderParseService/lib/HeaderParse/API/ParserMethods.pm +0 -7
  105. data/{perl → svm-header-parse}/HeaderParseService/lib/HeaderParse/Config/API_Config.pm +6 -1
  106. data/svm-header-parse/HeaderParseService/tmp/.gitignore +4 -0
  107. data/svm-header-parse/extract.pl +75 -0
  108. metadata +351 -317
  109. data/perl/DocFilter/lib/DocFilter/Config.pm +0 -35
  110. data/perl/DocFilter/lib/DocFilter/Filter.pm +0 -51
  111. data/perl/FileConversionService/README.TXT +0 -11
  112. data/perl/FileConversionService/converters/PDFBox/pdfbox-app-1.7.1.jar +0 -0
  113. data/perl/FileConversionService/lib/CSXUtil/SafeText.pm +0 -140
  114. data/perl/FileConversionService/lib/FileConverter/CheckSum.pm +0 -77
  115. data/perl/FileConversionService/lib/FileConverter/Compression.pm +0 -137
  116. data/perl/FileConversionService/lib/FileConverter/Config.pm +0 -57
  117. data/perl/FileConversionService/lib/FileConverter/Controller.pm +0 -191
  118. data/perl/FileConversionService/lib/FileConverter/JODConverter.pm +0 -61
  119. data/perl/FileConversionService/lib/FileConverter/PDFBox.pm +0 -69
  120. data/perl/FileConversionService/lib/FileConverter/PSConverter.pm +0 -69
  121. data/perl/FileConversionService/lib/FileConverter/PSToText.pm +0 -88
  122. data/perl/FileConversionService/lib/FileConverter/Prescript.pm +0 -68
  123. data/perl/FileConversionService/lib/FileConverter/TET.pm +0 -75
  124. data/perl/FileConversionService/lib/FileConverter/Utils.pm +0 -130
  125. data/perl/HeaderParseService/lib/CSXUtil/SafeText.pm +0 -140
  126. data/perl/HeaderParseService/resources/data/EbizHeaders.txt +0 -24330
  127. data/perl/HeaderParseService/resources/data/EbizHeaders.txt.parsed +0 -27506
  128. data/perl/HeaderParseService/resources/data/EbizHeaders.txt.parsed.old +0 -26495
  129. data/perl/HeaderParseService/resources/data/tagged_headers.txt +0 -40668
  130. data/perl/HeaderParseService/resources/data/test_header.txt +0 -31
  131. data/perl/HeaderParseService/resources/data/test_header.txt.parsed +0 -31
  132. data/perl/HeaderParseService/tmp/tmpVec_1156237246.08016_test1 +0 -23
  133. data/perl/HeaderParseService/tmp/tmpVec_1156237246.08016_test10 +0 -23
  134. data/perl/HeaderParseService/tmp/tmpVec_1156237246.08016_test11 +0 -23
  135. data/perl/HeaderParseService/tmp/tmpVec_1156237246.08016_test12 +0 -23
  136. data/perl/HeaderParseService/tmp/tmpVec_1156237246.08016_test13 +0 -23
  137. data/perl/HeaderParseService/tmp/tmpVec_1156237246.08016_test14 +0 -23
  138. data/perl/HeaderParseService/tmp/tmpVec_1156237246.08016_test15 +0 -23
  139. data/perl/HeaderParseService/tmp/tmpVec_1156237246.08016_test2 +0 -23
  140. data/perl/HeaderParseService/tmp/tmpVec_1156237246.08016_test3 +0 -23
  141. data/perl/HeaderParseService/tmp/tmpVec_1156237246.08016_test4 +0 -23
  142. data/perl/HeaderParseService/tmp/tmpVec_1156237246.08016_test5 +0 -23
  143. data/perl/HeaderParseService/tmp/tmpVec_1156237246.08016_test6 +0 -23
  144. data/perl/HeaderParseService/tmp/tmpVec_1156237246.08016_test7 +0 -23
  145. data/perl/HeaderParseService/tmp/tmpVec_1156237246.08016_test8 +0 -23
  146. data/perl/HeaderParseService/tmp/tmpVec_1156237246.08016_test9 +0 -23
  147. data/perl/HeaderParseService/tmp/tmpVec_914027525.276114_test1 +0 -23
  148. data/perl/HeaderParseService/tmp/tmpVec_914027525.276114_test10 +0 -23
  149. data/perl/HeaderParseService/tmp/tmpVec_914027525.276114_test11 +0 -23
  150. data/perl/HeaderParseService/tmp/tmpVec_914027525.276114_test12 +0 -23
  151. data/perl/HeaderParseService/tmp/tmpVec_914027525.276114_test13 +0 -23
  152. data/perl/HeaderParseService/tmp/tmpVec_914027525.276114_test14 +0 -23
  153. data/perl/HeaderParseService/tmp/tmpVec_914027525.276114_test15 +0 -23
  154. data/perl/HeaderParseService/tmp/tmpVec_914027525.276114_test2 +0 -23
  155. data/perl/HeaderParseService/tmp/tmpVec_914027525.276114_test3 +0 -23
  156. data/perl/HeaderParseService/tmp/tmpVec_914027525.276114_test4 +0 -23
  157. data/perl/HeaderParseService/tmp/tmpVec_914027525.276114_test5 +0 -23
  158. data/perl/HeaderParseService/tmp/tmpVec_914027525.276114_test6 +0 -23
  159. data/perl/HeaderParseService/tmp/tmpVec_914027525.276114_test7 +0 -23
  160. data/perl/HeaderParseService/tmp/tmpVec_914027525.276114_test8 +0 -23
  161. data/perl/HeaderParseService/tmp/tmpVec_914027525.276114_test9 +0 -23
  162. data/perl/ParsCit/crfpp/traindata/parsCit.template +0 -60
  163. data/perl/ParsCit/crfpp/traindata/parsCit.train.data +0 -12104
  164. data/perl/ParsCit/crfpp/traindata/tagged_references.txt +0 -500
  165. data/perl/ParsCit/lib/CSXUtil/SafeText.pm +0 -140
  166. data/perl/ParsCit/lib/ParsCit/Citation.pm +0 -462
  167. data/perl/ParsCit/lib/ParsCit/CitationContext.pm +0 -132
  168. data/perl/ParsCit/lib/ParsCit/Config.pm +0 -46
  169. data/perl/ParsCit/lib/ParsCit/Controller.pm +0 -306
  170. data/perl/ParsCit/lib/ParsCit/PostProcess.pm +0 -367
  171. data/perl/ParsCit/lib/ParsCit/PreProcess.pm +0 -333
  172. data/perl/ParsCit/resources/parsCit.model +0 -0
  173. data/perl/extract.pl +0 -199
  174. data/spec/biblicit/cb2bib_spec.rb +0 -48
  175. data/spec/biblicit/citeseer_spec.rb +0 -40
  176. /data/{perl → svm-header-parse}/HeaderParseService/README.TXT +0 -0
  177. /data/{perl/DocFilter → svm-header-parse/HeaderParseService}/lib/CSXUtil/SafeText.pm +0 -0
  178. /data/{perl → svm-header-parse}/HeaderParseService/lib/HeaderParse/API/AssembleXMLMetadata.pm +0 -0
  179. /data/{perl → svm-header-parse}/HeaderParseService/lib/HeaderParse/API/LoadInformation.pm +0 -0
  180. /data/{perl → svm-header-parse}/HeaderParseService/lib/HeaderParse/API/NamePatternMatch.pm +0 -0
  181. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/50states +0 -0
  182. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/AddrTopWords.txt +0 -0
  183. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/AffiTopWords.txt +0 -0
  184. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/AffiTopWordsAll.txt +0 -0
  185. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/ChineseSurNames.txt +0 -0
  186. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/Csurnames.bin +0 -0
  187. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/Csurnames_spec.bin +0 -0
  188. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/DomainSuffixes.txt +0 -0
  189. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/LabeledHeader +0 -0
  190. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/README +0 -0
  191. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/TrainMulClassLines +0 -0
  192. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/TrainMulClassLines1 +0 -0
  193. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/abstract.txt +0 -0
  194. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/abstractTopWords +0 -0
  195. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/addr.txt +0 -0
  196. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/affi.txt +0 -0
  197. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/affis.bin +0 -0
  198. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/all_namewords_spec.bin +0 -0
  199. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/allnamewords.bin +0 -0
  200. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/cities_US.txt +0 -0
  201. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/cities_world.txt +0 -0
  202. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/city.txt +0 -0
  203. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/cityname.txt +0 -0
  204. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/country_abbr.txt +0 -0
  205. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/countryname.txt +0 -0
  206. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/dateTopWords +0 -0
  207. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/degree.txt +0 -0
  208. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/email.txt +0 -0
  209. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/excludeWords.txt +0 -0
  210. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/female-names +0 -0
  211. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/firstNames.txt +0 -0
  212. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/firstnames.bin +0 -0
  213. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/firstnames_spec.bin +0 -0
  214. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/intro.txt +0 -0
  215. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/keyword.txt +0 -0
  216. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/keywordTopWords +0 -0
  217. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/male-names +0 -0
  218. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/middleNames.txt +0 -0
  219. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/month.txt +0 -0
  220. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/mul +0 -0
  221. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/mul.label +0 -0
  222. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/mul.label.old +0 -0
  223. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/mul.processed +0 -0
  224. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/mulAuthor +0 -0
  225. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/mulClassStat +0 -0
  226. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/nickname.txt +0 -0
  227. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/nicknames.bin +0 -0
  228. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/note.txt +0 -0
  229. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/page.txt +0 -0
  230. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/phone.txt +0 -0
  231. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/postcode.txt +0 -0
  232. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/pubnum.txt +0 -0
  233. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/statename.bin +0 -0
  234. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/statename.txt +0 -0
  235. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/states_and_abbreviations.txt +0 -0
  236. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/stopwords +0 -0
  237. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/stopwords.bin +0 -0
  238. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/surNames.txt +0 -0
  239. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/surnames.bin +0 -0
  240. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/surnames_spec.bin +0 -0
  241. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/university_list/A.html +0 -0
  242. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/university_list/B.html +0 -0
  243. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/university_list/C.html +0 -0
  244. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/university_list/D.html +0 -0
  245. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/university_list/E.html +0 -0
  246. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/university_list/F.html +0 -0
  247. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/university_list/G.html +0 -0
  248. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/university_list/H.html +0 -0
  249. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/university_list/I.html +0 -0
  250. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/university_list/J.html +0 -0
  251. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/university_list/K.html +0 -0
  252. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/university_list/L.html +0 -0
  253. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/university_list/M.html +0 -0
  254. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/university_list/N.html +0 -0
  255. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/university_list/O.html +0 -0
  256. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/university_list/P.html +0 -0
  257. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/university_list/Q.html +0 -0
  258. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/university_list/R.html +0 -0
  259. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/university_list/S.html +0 -0
  260. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/university_list/T.html +0 -0
  261. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/university_list/U.html +0 -0
  262. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/university_list/V.html +0 -0
  263. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/university_list/W.html +0 -0
  264. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/university_list/WCSelect.gif +0 -0
  265. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/university_list/X.html +0 -0
  266. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/university_list/Y.html +0 -0
  267. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/university_list/Z.html +0 -0
  268. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/university_list/ae.html +0 -0
  269. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/university_list/am.html +0 -0
  270. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/university_list/ar.html +0 -0
  271. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/university_list/at.html +0 -0
  272. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/university_list/au.html +0 -0
  273. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/university_list/bd.html +0 -0
  274. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/university_list/be.html +0 -0
  275. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/university_list/bg.html +0 -0
  276. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/university_list/bh.html +0 -0
  277. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/university_list/blueribbon.gif +0 -0
  278. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/university_list/bm.html +0 -0
  279. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/university_list/bn.html +0 -0
  280. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/university_list/br.html +0 -0
  281. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/university_list/ca.html +0 -0
  282. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/university_list/ch.html +0 -0
  283. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/university_list/cl.html +0 -0
  284. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/university_list/cn.html +0 -0
  285. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/university_list/co.html +0 -0
  286. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/university_list/cr.html +0 -0
  287. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/university_list/cy.html +0 -0
  288. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/university_list/cz.html +0 -0
  289. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/university_list/de.html +0 -0
  290. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/university_list/dean-mainlink.jpg +0 -0
  291. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/university_list/dk.html +0 -0
  292. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/university_list/ec.html +0 -0
  293. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/university_list/ee.html +0 -0
  294. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/university_list/eg.html +0 -0
  295. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/university_list/es.html +0 -0
  296. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/university_list/et.html +0 -0
  297. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/university_list/faq.html +0 -0
  298. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/university_list/fi.html +0 -0
  299. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/university_list/fj.html +0 -0
  300. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/university_list/fo.html +0 -0
  301. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/university_list/fr.html +0 -0
  302. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/university_list/geog.html +0 -0
  303. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/university_list/gr.html +0 -0
  304. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/university_list/gu.html +0 -0
  305. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/university_list/hk.html +0 -0
  306. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/university_list/hr.html +0 -0
  307. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/university_list/hu.html +0 -0
  308. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/university_list/id.html +0 -0
  309. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/university_list/ie.html +0 -0
  310. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/university_list/il.html +0 -0
  311. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/university_list/in.html +0 -0
  312. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/university_list/is.html +0 -0
  313. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/university_list/it.html +0 -0
  314. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/university_list/jm.html +0 -0
  315. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/university_list/jo.html +0 -0
  316. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/university_list/jp.html +0 -0
  317. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/university_list/kaplan.gif +0 -0
  318. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/university_list/kr.html +0 -0
  319. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/university_list/kw.html +0 -0
  320. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/university_list/lb.html +0 -0
  321. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/university_list/linkbw2.gif +0 -0
  322. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/university_list/lk.html +0 -0
  323. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/university_list/lt.html +0 -0
  324. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/university_list/lu.html +0 -0
  325. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/university_list/lv.html +0 -0
  326. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/university_list/ma.html +0 -0
  327. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/university_list/maczynski.gif +0 -0
  328. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/university_list/mirror.tar +0 -0
  329. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/university_list/mk.html +0 -0
  330. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/university_list/mo.html +0 -0
  331. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/university_list/mseawdm.gif +0 -0
  332. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/university_list/mt.html +0 -0
  333. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/university_list/mx.html +0 -0
  334. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/university_list/my.html +0 -0
  335. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/university_list/ni.html +0 -0
  336. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/university_list/nl.html +0 -0
  337. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/university_list/no.html +0 -0
  338. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/university_list/nz.html +0 -0
  339. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/university_list/pa.html +0 -0
  340. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/university_list/pe.html +0 -0
  341. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/university_list/ph.html +0 -0
  342. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/university_list/pl.html +0 -0
  343. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/university_list/pointcom.gif +0 -0
  344. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/university_list/pr.html +0 -0
  345. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/university_list/ps.html +0 -0
  346. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/university_list/pt.html +0 -0
  347. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/university_list/recognition.html +0 -0
  348. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/university_list/results.html +0 -0
  349. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/university_list/ro.html +0 -0
  350. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/university_list/ru.html +0 -0
  351. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/university_list/sd.html +0 -0
  352. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/university_list/se.html +0 -0
  353. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/university_list/sg.html +0 -0
  354. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/university_list/si.html +0 -0
  355. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/university_list/sk.html +0 -0
  356. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/university_list/th.html +0 -0
  357. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/university_list/tr.html +0 -0
  358. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/university_list/tw.html +0 -0
  359. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/university_list/ua.html +0 -0
  360. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/university_list/uk.html +0 -0
  361. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/university_list/univ-full.html +0 -0
  362. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/university_list/univ.html +0 -0
  363. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/university_list/uy.html +0 -0
  364. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/university_list/ve.html +0 -0
  365. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/university_list/yu.html +0 -0
  366. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/university_list/za.html +0 -0
  367. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/university_list/zm.html +0 -0
  368. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/university_list.txt +0 -0
  369. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/url.txt +0 -0
  370. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/webTopWords +0 -0
  371. /data/{perl → svm-header-parse}/HeaderParseService/resources/database/words +0 -0
  372. /data/{perl → svm-header-parse}/HeaderParseService/resources/models/10ContextModelfold1 +0 -0
  373. /data/{perl → svm-header-parse}/HeaderParseService/resources/models/10Modelfold1 +0 -0
  374. /data/{perl → svm-header-parse}/HeaderParseService/resources/models/11ContextModelfold1 +0 -0
  375. /data/{perl → svm-header-parse}/HeaderParseService/resources/models/11Modelfold1 +0 -0
  376. /data/{perl → svm-header-parse}/HeaderParseService/resources/models/12ContextModelfold1 +0 -0
  377. /data/{perl → svm-header-parse}/HeaderParseService/resources/models/12Modelfold1 +0 -0
  378. /data/{perl → svm-header-parse}/HeaderParseService/resources/models/13ContextModelfold1 +0 -0
  379. /data/{perl → svm-header-parse}/HeaderParseService/resources/models/13Modelfold1 +0 -0
  380. /data/{perl → svm-header-parse}/HeaderParseService/resources/models/14ContextModelfold1 +0 -0
  381. /data/{perl → svm-header-parse}/HeaderParseService/resources/models/14Modelfold1 +0 -0
  382. /data/{perl → svm-header-parse}/HeaderParseService/resources/models/15ContextModelfold1 +0 -0
  383. /data/{perl → svm-header-parse}/HeaderParseService/resources/models/15Modelfold1 +0 -0
  384. /data/{perl → svm-header-parse}/HeaderParseService/resources/models/1ContextModelfold1 +0 -0
  385. /data/{perl → svm-header-parse}/HeaderParseService/resources/models/1Modelfold1 +0 -0
  386. /data/{perl → svm-header-parse}/HeaderParseService/resources/models/2ContextModelfold1 +0 -0
  387. /data/{perl → svm-header-parse}/HeaderParseService/resources/models/2Modelfold1 +0 -0
  388. /data/{perl → svm-header-parse}/HeaderParseService/resources/models/3ContextModelfold1 +0 -0
  389. /data/{perl → svm-header-parse}/HeaderParseService/resources/models/3Modelfold1 +0 -0
  390. /data/{perl → svm-header-parse}/HeaderParseService/resources/models/4ContextModelfold1 +0 -0
  391. /data/{perl → svm-header-parse}/HeaderParseService/resources/models/4Modelfold1 +0 -0
  392. /data/{perl → svm-header-parse}/HeaderParseService/resources/models/5ContextModelfold1 +0 -0
  393. /data/{perl → svm-header-parse}/HeaderParseService/resources/models/5Modelfold1 +0 -0
  394. /data/{perl → svm-header-parse}/HeaderParseService/resources/models/6ContextModelfold1 +0 -0
  395. /data/{perl → svm-header-parse}/HeaderParseService/resources/models/6Modelfold1 +0 -0
  396. /data/{perl → svm-header-parse}/HeaderParseService/resources/models/7ContextModelfold1 +0 -0
  397. /data/{perl → svm-header-parse}/HeaderParseService/resources/models/7Modelfold1 +0 -0
  398. /data/{perl → svm-header-parse}/HeaderParseService/resources/models/8ContextModelfold1 +0 -0
  399. /data/{perl → svm-header-parse}/HeaderParseService/resources/models/8Modelfold1 +0 -0
  400. /data/{perl → svm-header-parse}/HeaderParseService/resources/models/9ContextModelfold1 +0 -0
  401. /data/{perl → svm-header-parse}/HeaderParseService/resources/models/9Modelfold1 +0 -0
  402. /data/{perl → svm-header-parse}/HeaderParseService/resources/models/NameSpaceModel +0 -0
  403. /data/{perl → svm-header-parse}/HeaderParseService/resources/models/NameSpaceTrainF +0 -0
  404. /data/{perl → svm-header-parse}/HeaderParseService/resources/models/WrapperBaseFeaDict +0 -0
  405. /data/{perl → svm-header-parse}/HeaderParseService/resources/models/WrapperContextFeaDict +0 -0
  406. /data/{perl → svm-header-parse}/HeaderParseService/resources/models/WrapperSpaceAuthorFeaDict +0 -0
@@ -0,0 +1,394 @@
1
+ Explanation-Based Learning of
2
+ Indirect Speech Act Interpretation Rules
3
+ David Schulenburg (schulenb@ics.uci.edu)
4
+ Michael J. Pazzani (pazzani@ics.uci.edu)
5
+ Technical Report -- 89 -- 11
6
+ 10 May 1989
7
+ Department of Information & Computer Science
8
+ University of California, Irvine, CA 92717 USA
9
+ copyright c fl 1989 University of California, Irvine
10
+ Abstract
11
+ We describe an approach to deriving efficient rules for interpreting the intended meaning
12
+ of indirect speech acts. We have constructed a system called sally that starts with a few,
13
+ very general principles for understanding the intention of the speaker of an utterance. After
14
+ inferring the intended meaning of a particular utterance, sally creates a specialized rule
15
+ to understand directly similar utterances in the future.
16
+
17
+ Introduction: Indirect Speech Acts
18
+
19
+ Responding appropriately to a question often requires the listener to understand the intention
20
+ of the speaker. For example, consider the following simple question:
21
+ Q: Do you have a match?
22
+ Taken literally, this question is a request for information. However, in most contexts, this
23
+ question should be interpreted as a request for the listener to give the speaker a match. This
24
+ is a kind of speech act (Austin, 1962) called an indirect speech act (Searle, 1975), in which the
25
+ intent of the speaker differs from the direct, literal meaning of the speaker's utterance. For a
26
+ computer to take part in a conversation, it is essential that it have the ability to understand
27
+ indirect speech acts. An important part of this capability is to gain an understanding of
28
+ the class of situations in which the indirect interpretation should be preferred to the direct
29
+ interpretation. For example, a slight variant of the above question is typically interpreted
30
+ differently:
31
+ Q: Do you have a BMW?
32
+ Two approaches to the interpretation of indirect speech acts have been proposed in computational
33
+ linguistics. One approach, typified by qualm (Lehnert, 1979), makes use of a large
34
+ number of fairly specific, knowledge-intensive interpretation rules. For example, qualm
35
+
36
+ contains one rule that interprets a question to verify if the listener possesses an object as a
37
+ request for the listener to give the speaker the object, if the object is small and inexpensive.
38
+ The primary advantage of the knowledge-intensive approach is that it is efficient. A discrimination
39
+ net that indexes the interpretation rules directs the search for an interpretation.
40
+ There are several disadvantages with the knowledge-intensive approach as implemented in
41
+
42
+ qualm. First, it is difficult if not impossible, to encode an exhaustive set of rules that would
43
+ perform well on a large variety of examples. Second, the knowledge-intensive approach does
44
+ not capture any of the generalities among interpretation rules. A wide variety of knowledgeintensive
45
+ interpretation rules are specialized forms of a general rule: one interpretation of
46
+ a question to verify that a precondition of a plan is true is that the speaker wants the
47
+ listener to execute the plan. Finally, as a cognitive model, the approach does not specify
48
+ how the interpretation rules might be acquired or extended as new plans are learned. The
49
+ interpretation of an indirect speech act is a function of the plans that the speaker believes
50
+ the listener is capable of executing (or understanding) (Perrault and Allen, 1980). When
51
+ an additional plan is acquired, it may be necessary for the knowledge-intensive approach to
52
+ acquire additional interpretation rules.
53
+ The alternative approach to finding the intended meaning of an indirect speech act is to
54
+ have a small set of general rules that a listener may use to infer the speaker's plan from
55
+ the utterance (Allen and Perrault, 1980; Cohen and Perrault, 1979). This approach takes
56
+ advantage of planning formalisms (Wilensky, 1983; Fikes, 1971) to represent the content
57
+ of a conversation (Grosz and Sidner, 1986; Litman and Allen, 1987). Unlike knowledge-
58
+
59
+
60
+ 2
61
+
62
+ intensive rules, these general rules can be applied to a variety of examples since the rules
63
+ operate on a specification of the speaker's or listener's plans. However, there are also several
64
+ disadvantages to this approach. First, the search for an interpretation can be inefficient.
65
+ Second, as a cognitive model, it is not clear that a human listener goes through the long
66
+ inference process that is necessary to arrive at the interpretation. For example, Searle (1975)
67
+ has said: "In normal conversation, of course, no one would consciously go through the steps
68
+ involved in this reasoning." Allen and Perrault (1980) have made similar statements that do
69
+ not leave open the possibility that people unconsciously go through a long inference chain:
70
+ Note that, in actual fact, people probably use muchmore specialized knowledge
71
+ to infer the plans of others, thereby bypassing many of the particular inferences
72
+ we suggest. Our approach so far, has been to specify a minimal set of reasoning
73
+ tools that can account for the behavior observed.
74
+ Psycholinguistic studies have shown that in many circumstances, it takes no longer for a
75
+ person to recognize an indirect speech act than to find the direct meaning of an utterance.
76
+ For example, in one experiment (Gibbs, 1983), subjects found it no more difficult to find
77
+ the indirect interpretation of a request such as "Can't you be friendly?" than the literal
78
+ interpretation.
79
+ The approach that we take is a hybrid between the specific, knowledge-intensive approach
80
+ and the general, plan-based approach. In particular, we make use of general plan-based
81
+ rules to interpret novel (to the system) utterances. However, once an interpretation has
82
+ been found, we derive a knowledge-intensive rule to interpret directly "similar" utterances in
83
+ the future. The knowledge-intensive rule is created by explanation-based learning techniques
84
+ (Mitchell, Kedar-Cabelli, and Keller, 1986; DeJong and Mooney, 1986). The "similar" utterances
85
+ are those that share the features that the plan-based analysis needed to check to
86
+ infer the interpretation of the indirect speech act.
87
+
88
+ Explanation-based learning
89
+
90
+ Explanation-based learning (EBL) is a learning method which analytically generalizes an
91
+ example. EBL systems share a common approach to generalization. First, an example
92
+ problem is solved producing an explanation (occasionally called a justification, or a proof)
93
+ that indicates what information (e.g., features of the example and inference rules) was needed
94
+ to arrive at a solution. Next, the example is generalized by retaining only those features of the
95
+ example which were necessary to produce the explanation. This generalization characterizes
96
+ the class of problems that will have the same solution for the same reason as the training
97
+ example. EBL explicates (or operationalizes (Keller, 1987)) information that is implicitly
98
+ represented in a system. For example, aces (Pazzani, 1987) is a system that learns diagnosis
99
+ heuristics (i.e., efficient heuristics that associate faults with symptoms) from a functional
100
+ device description. In this work, we are using a modified version of the eggs (Mooney and
101
+ Bennett, 1986) explanation-based learning algorithm to explicate conditions under which an
102
+ indirect interpretation of a speech act can be inferred.
103
+
104
+ 3
105
+ If the effect of act is e [Action-Effect Rule]
106
+ and actor wants e
107
+ then actor wants act.
108
+ If actor1 wants actor2 to want to do act [Want-Action Rule]
109
+ then actor1 wants actor2 to do act
110
+ A precondition for actor atransing object [give requires have]
111
+ is that actor possess object.
112
+ actor1 will atrans (cheap) object to actor2
113
+
114
+ if actor1 and actor2 are friends.
115
+
116
+ Figure 1. Speech act interpretation rules
117
+
118
+ 1
119
+ Learning to interpret indirect speech acts
120
+
121
+ We illustrate the process that sally goes through to learn a rule to interpret directly indirect
122
+ speech acts with an example. Consider again the request: "Do you have a match?" The
123
+ surface speech act here is a verification of possession of a match. However, in most contexts,
124
+ the intent of the speaker is not to ask for a verification. Rather, the speaker is requesting
125
+ some action of the hearer, e.g., to give the match to the speaker. The ATRANS-Request
126
+ Conversion rule (Lehnert, 1978) states that given a verification request of a possession state
127
+ of some object which has little value, a possible target interpretation is a request of the hearer
128
+ to give the speaker that object. We address here the issue of learning this rule. We can trace
129
+ through the understanding cycle used to generate the ATRANS interpretation using the
130
+ method of Allen and Perrault (1980). sally makes use of backward chaining inference rules
131
+ for inferring the speakers intentions, and for indicating the effects and preconditions of plans.
132
+ Figure 1 illustrates four of the rules that are used in the following example.
133
+
134
+ 2
135
+ The initial representation of the surface speech act is:
136
+
137
+ 3
138
+ HBSW (S-REQUEST (S, H, INFORMIF (H, S, POSSESS (H, MATCH))))
139
+ This is read as: "The hearer believes the speaker wants : : : " (this is the intentional part of
140
+ the speaker's speech act) ":
141
+
142
+ : :
143
+
144
+ to perform a yes-no question regarding the hearer's possession
145
+ of a match." Using the action-effect (an effect of the S-REQUEST is that HW(HDo action))
146
+ and want-action rules, sally can infer:
147
+ HBSW (INFORMIF (H, S, POSSESS (H, MATCH)))
148
+ That is, the hearer believes that the speaker wants the hearer to inform the speaker whether
149
+
150
+ 1
151
+
152
+ Action-Effect and Want-Action are from Allen & Perrault (1980).
153
+
154
+ 2
155
+
156
+ The Appendix lists the rules using sally's actual representation.
157
+
158
+ 3
159
+
160
+ In this discussion, we use Allen and Perrault's notation because it is more concise than the equivalent
161
+ representation used in the computer implementation.
162
+
163
+ 4
164
+
165
+ or not the hearer possesses the match. An effect of INFORMIF is KNOWIF; using the
166
+ action-effect rule again sally can infer:
167
+ HBSW (KNOWIF (S, POSSESS (H, MATCH)))
168
+ From here, sally can use the know-positive rule and infer:
169
+ HBSW (POSSESS (H, MATCH))
170
+ Since possessing a match is a precondition to giving it away, sally can use the preconditionaction
171
+ rule to infer:
172
+ HBSW (ATRANS (H, S, MATCH))
173
+ This inference process interprets the surface speech act of asking about possession of a match
174
+ as an indirect speech act of requesting the hearer to give a match to the speaker. For a request
175
+ speech act to have the desired effect, it is necessary that that the hearer want to comply
176
+ with the request. sally has a rule stating that someone will give someone else an object
177
+ if it is of little value and there is an amicable relationship between the two people. Using
178
+ a small data-base of plan-based rules, sally constructs an inference chain of length seven
179
+ to infer the speaker's intent in this question. Explanation-based learning techniques can be
180
+ used to "compile" this inference process.
181
+ The effect of explanation-based learning on this example is to create a knowledge-intensive
182
+ rule which avoids many of the intermediate steps of the plan-based inference. The knowledgeintensive
183
+ rule has the same conclusion as the longer inference process. The preconditions
184
+ on this rule are exactly those features of the surface speech act and the situation which
185
+ were tested during the inference process to establish the conclusion. In this case, these
186
+ preconditions are that the object of the inquiry be of little value, and that the relationship
187
+ between the speaker and the hearer be an amicable one. Figure 2 illustrates the result of
188
+ the explanation-based learning process on this example.
189
+ Once sally has acquired the rule in Figure 2, the interpretation of similar queries is more
190
+ direct. For example, to interpret the question "Do you have some gum?" requires an
191
+ inference chain of length two. The constraints that are derived during the explanation-based
192
+ learning process do not allow utterances such as "Do you have a BMW?" to be interpreted
193
+ as requests.
194
+
195
+ Current status
196
+
197
+ sally is implemented in Common Lisp. It does not currently contain a parser. The input
198
+ to sally is a representation of the surface speech act of an utterance; the output is an
199
+ identification of the intended speech act (e.g., REQUEST, INFORM, etc.). Using a similar
200
+ line of reasoning to the above example, we have been able to reconstruct some of qualm's
201
+
202
+ 5
203
+ (!- (INTERPRETATION (S-REQUEST (SPEAKER ?S)
204
+ (HEARER ?H)
205
+ (ACT (TYPE INFORMIF)
206
+ (ACTOR ?h)
207
+ (TO ?S)
208
+ (STATE (TYPE POSSESS)
209
+ (OBJECT (P-OBJ (TYPE ?T)
210
+ (OWNER ?H)
211
+ (LOC ?L)
212
+ (VALUE CHEAP)))
213
+ (ACTOR ?H))))
214
+ (REQUEST (SPEAKER ?H)
215
+ (HEARER ?S)
216
+ (ACT (TYPE ATRANS)
217
+ (ACTOR ?H)
218
+ (OBJECT (P-OBJ (TYPE ?T)
219
+ (OWNER ?H)
220
+ (LOC ?L)
221
+ (VALUE CHEAP)))
222
+ (TO ?S)
223
+ (FROM ?H))))
224
+ (RELATIONSHIP ?H ?S AMICABLE))
225
+
226
+ Figure 2. A knowledge-intensive indirect speech act interpretation rule.
227
+
228
+ 3
229
+ knowledge-intensive heuristics from a minimal set of interpretation rules and a library of
230
+ plans. For example, we can generate the Function-Request Conversion rule. Consider: "Do
231
+ you have your car here?" The surface speech act is a verification request regarding the
232
+ location of the hearer's car. An indirect interpretation could be "Would you drive your
233
+ car?" Recognizing that possession of some object in the immediate vicinity is a precondition
234
+ to using that object, we can infer the desired indirect interpretation.
235
+ One limitation of sally is that it is limited to utterances that can be addressed in the
236
+ plan-based approach to interpreting indirect speech acts. For example, there is no difference
237
+ in the literal meanings of "Can you pass the salt?" and "Are you able to pass the salt?"
238
+ However, the indirect interpretation is acceptable of the former but not the latter. Planbased
239
+ approaches such as ours ignore the linguistic information associated with the literal
240
+ meaning of the utterance (Hinkleman, 1988). One solution to this problem, which is more
241
+ faithful to the psycholinguistic data, is to have rules that map phrases to interpretations
242
+ without having an intermediate representation of the literal meaning.
243
+
244
+ Conclusion
245
+
246
+ We have proposed a hybrid approach to the interpretation of indirect speech acts that combines
247
+ the best features of knowledge-intensive and plan-based approaches. In particular, the
248
+ intended meaning of common types of indirect speech acts are found rapidly by knowledgeintensive
249
+ rules that directly map the surface speech act of an utterance to the intended
250
+
251
+ 4
252
+
253
+ Questions of the form "Do you possess an inexpensive object?" are interpreted as a request for the hearer
254
+ to give the speaker the object if the speaker and the hearer have an amicable relationship.
255
+
256
+ 6
257
+
258
+ meaning. However, it is not necessary to hand-code and maintain a large set of interpretation
259
+ rules. The knowledge-intensive rules are acquired by using explanation-based learning
260
+ after the interpretation of a novel utterance is found by a general, but inefficient search
261
+ process.
262
+
263
+ Acknowledgements
264
+
265
+ We would like to thank Ray Mooney for discussions on EGGS and Elizabeth Hinkleman for
266
+ discussions on interpreting indirect speech acts.
267
+
268
+ References
269
+
270
+ Allen, J. & Perrault, C. (1980). Analyzing intention in utterances. Artificial Intelligence,
271
+ 15, 143-178.
272
+ Austin, J. (1962). How to do things with words. Cambridge, MA: Harvard University Press.
273
+ Cohen, P. & Perrault, C. (1979). Elements of a plan-based theory of speech acts. Cognitive
274
+ Science, 3, 177-212.
275
+ DeJong, G. & Mooney, R. (1986). Explanation-based learning: An alternate view. Machine
276
+ Learning, 1, 145-176.
277
+ Fikes, R. & Nilsson, N. (1971). STRIPS: A new approach to the application of theorem
278
+ proving to problem solving. Artificial Intelligence, 2, 189-208.
279
+ Gibbs, R. (1983). Do people always process the literal meaning of indirect requests? Journal
280
+ of Experimental Psychology: Learning, Memory, and Cognition, 3, 524-533.
281
+ Gibbs, R. (1984). Literal meaning and psychological theory. Cognitive Science, 8, 275-305.
282
+ Grosz, B. & Sidner, C. (1986). Attention, intentions and the structure of discourse. American
283
+ Journal of Computational Linguistics, 12, 175-204.
284
+ Hinkleman, E. & Allen, J. (1988). How to do things with words, computationally speaking
285
+
286
+ (Technical Report). Rochester, NY: Computer Science Department, University of Rochester.
287
+ Keller, R. (1987). Defining operationality for explanation-based learning. Proceedings of the
288
+ National Conference on Artificial Intelligence (482-487). Seattle, WA: Morgan-Kaufmann.
289
+ Lehnert, W. (1978). The process of question answering. Hillsdale, NJ: Lawrence Erlbaum
290
+ Associates.
291
+ Litman, D. & Allen, J. (1987). A plan recognition model for subdialogues in conversation.
292
+
293
+ Cognitive Science, 11, 163-200.
294
+
295
+ 7
296
+
297
+ Mitchell, T., Kedar-Cabelli, S., & Keller, R. (1986). Explanation-based learning: A unifying
298
+ view. Machine Learning, 1, 47-80.
299
+ Mooney, R. & Bennett, S. (1986). A domain independent explanation-based generalizer.
300
+
301
+ Proceedings of the Fifth National Conference on Artificial Intelligence (551-555). Philadelphia:
302
+ Morgan Kaufmann.
303
+ Pazzani, M. (1987). Explanation-based learning for knowledge-based systems. International
304
+ Journal of Man-Machine Studies, 26, 413-433.
305
+ Perrault, C. & Allen, J. (1980). A plan-based analysis of indirect speech acts. American
306
+ Journal of Computational Linguistics, 6, 167-182.
307
+ Searle, J. R. (1975). Indirect speech acts. In P. Cole & J. L. Morgan (Eds.), Syntax and
308
+ Semantics, Vol. 3, Speech Acts. New York: Academic Press.
309
+ Wilensky, R. (1983). Planning and understanding. Reading, MA: Addison-Wesley.
310
+
311
+ 8
312
+
313
+ Appendix
314
+ ; indirect interpretation
315
+
316
+ (!- (interpretation (s-request (speaker ?s)
317
+ (hearer ?l)
318
+ ?a1)
319
+ (request (speaker ?l)
320
+ (hearer ?s)
321
+ ?a2)))
322
+ ; "want precondition"
323
+
324
+ (want ?s (request (speaker ?s)
325
+ (hearer ?l)
326
+ ?a1)
327
+ ?a2)
328
+ (want-perform ?l ?s ?a2))
329
+ ; "action effect" rule
330
+
331
+ (!- (want ?a ?act ?res)
332
+ (effect ?act ?e)
333
+ (want ?a ?e ?res))
334
+ ; "want-action" rule
335
+
336
+ (!- (want ?a (want ?s ?act ?res1) ?res2)
337
+ (want ?a ?act ?res2))
338
+ ; "know positive" rule
339
+
340
+ (!- (want ?a (knowif ?a ?p) ?res)
341
+ (want ?a ?p ?res))
342
+ ; "precondition-action" rule
343
+
344
+ (!- (want ?a ?p ?res)
345
+ (precondition ?p ?res))
346
+ ; an "amicable" relationship is required for ATRANSing cheap objects
347
+
348
+ (!- (want-perform ?actor ?for (act (type atrans)
349
+ (actor ?actor)
350
+ (object (p-obj (type ?x)
351
+ (owner ?actor)
352
+ (loc ?loc)
353
+ (value cheap)))
354
+ (to ?for)
355
+ (from ?actor)))
356
+ (relationship ?actor ?for amicable))
357
+
358
+ 9
359
+
360
+ ; precondition for ATRANSing is possession
361
+
362
+ (!- (precondition (state (type possess)
363
+ (object ?o)
364
+ (actor ?a))
365
+ (act (type atrans)
366
+ (actor ?a)
367
+ (object ?o)
368
+ (to ?to)
369
+ (from ?a))))
370
+
371
+ ; precondition for using is possession
372
+
373
+ (!- (precondition (state (type possess)
374
+ (object ?o)
375
+ (actor ?l))
376
+ (plan (type use)
377
+ (actor ?a)
378
+ (object ?o))))
379
+
380
+ ; effect of request is want
381
+
382
+ (!- (effect (request (speaker ?a)
383
+ (hearer ?s)
384
+ ?act)
385
+ (want ?s ?act ?res)))
386
+
387
+ ; effect of informif is knowif
388
+
389
+ (!- (effect (act (type informif)
390
+ (actor ?a)
391
+ (to ?s)
392
+ ?p)
393
+ (knowif ?s ?p)))
394
+
data/spec/spec_helper.rb CHANGED
@@ -1,3 +1,6 @@
1
1
  # encoding: UTF-8
2
2
 
3
3
  require 'biblicit'
4
+ require 'pry'
5
+
6
+ FIXTURES_DIR = "#{File.dirname(__FILE__)}/fixtures/"
@@ -26,14 +26,8 @@ use vars qw(%dictH %nameH %monthH %affiH %addrH %conjH %prepH %postcodeH %cityH
26
26
 
27
27
  sub AddrMatch() {
28
28
 
29
- ###open (MYLOGGER, ">ADDRMATCH.LOG");
30
- ###MYLOGGER->autoflush(1);
31
-
32
29
  my $inline = shift;
33
30
 
34
- ###$inline="Solitary Waves in the Critical Surface Tension Model";
35
- ###print MYLOGGER "$inline\n";
36
-
37
31
  my @words = split(/\s+/, $inline);
38
32
  my $senLen = 0;
39
33
 
@@ -43,27 +37,15 @@ sub AddrMatch() {
43
37
  $senLen ++; # punctuation
44
38
  }
45
39
 
46
- ###foreach $word (@words){
47
- ###print MYLOGGER "before : word is \"$word\"\n";
48
- ###$word = lc($word);
49
- ###print MYLOGGER "after : word is \"$word\"\n";
50
- ###}
51
-
52
-
53
- ###print MYLOGGER "count is $#words\n";
54
-
55
40
  for my $i(1 .. $#words) {
56
- ### print MYLOGGER "word is $words[$i]\n";
57
41
  if ($words[$i] !~ /^\W+\s*$/) {
58
42
  $senLen ++; # punctuation
59
43
  }
60
44
  #the first letter is capitalized
61
45
  if (($words[$i-1] =~ /^[\p{IsUpper}]/) && ($words[$i] =~ /^[\p{IsUpper}]/)) {
62
- ###print MYLOGGER "before: $words[$i-1],$words[$i]\n";
63
46
  my $pre = lc($words[$i-1]);
64
47
 
65
48
  my $now = lc($words[$i]);
66
- ###print MYLOGGER "pre is $pre\n now is $now\n";
67
49
  if (exists $stateH{"$pre $now"}) { # need to check if it is correct
68
50
  $words[$i-1] = "";
69
51
  $words[$i] = ":state:";
@@ -76,8 +58,8 @@ sub AddrMatch() {
76
58
  }
77
59
  }
78
60
  }
79
- ###CLOSE(MYLOGGER);
80
- #Broken line is because of the insufficient hard disk
61
+
62
+ #Broken line is because of the insufficient hard disk
81
63
  $inline = "@words"; #nice join!
82
64
  $inline =~ s/^\s+//g;
83
65
  $inline =~ s/\s+$//g;
@@ -24,13 +24,6 @@ use HeaderParse::API::Function qw(&AddrMatch &printDict &GenTrainVecMatrix &Line
24
24
  use HeaderParse::Config::API_Config qw($offlineD $Classifier $Tmp_Dir);
25
25
  #return chunk array, but it could contain only text, not real @; needs further processing
26
26
 
27
- #my $offlineD = "../../offline/";
28
- #my $tmpCacheVec = "$offlineD"."tmpVec";
29
- #my $SVMTmpResult = "$offlineD"."tmpresult";
30
-
31
- #my $tmpCacheVec = "$Tmp_Dir/tmpVec";
32
- #my $SVMTmpResult = "$Tmp_Dir/tmpresult";
33
-
34
27
  sub LocateEmailFromComponent() {
35
28
  my $component = shift;
36
29
  my %FindH = ();
@@ -27,8 +27,6 @@ sub _parseHeader{
27
27
  my $status = 1;
28
28
  my $msg = "";
29
29
 
30
- # my $file = "$repositoryLocation/$fileID";
31
- # print "file: $file\n";
32
30
  my $file = $fileID;
33
31
 
34
32
  if (! -e $file) {
@@ -40,11 +40,6 @@ my $TotalHea = 0;
40
40
 
41
41
  my $timestamp;
42
42
 
43
- #my $offlineD = "../../offline/";
44
- #my $TestOutF = "$TestF"."\.parsed";
45
- #my $tmpCacheVecB = "$Tmp_Dir/tmpVec";
46
- #my $SVMTmpResultB = "$Tmp_Dir/tmpresult";
47
-
48
43
  my $FeatureDict = "$offlineD"."WrapperBaseFeaDict";
49
44
  my $ContextFeatureDict = "$offlineD"."WrapperContextFeaDict";
50
45
  my $SpaceAuthorFeatureDictF = "$offlineD"."WrapperSpaceAuthorFeaDict";
@@ -89,8 +84,6 @@ sub Parse{
89
84
  my $header=shift;
90
85
  $timestamp = shift;
91
86
  my $success = 0;
92
- # $tmpCacheVec = $tmpCacheVec . "\_$timestamp\_";
93
- # $SVMTmpResult = $SVMTmpResult . "\_$timestamp\_";
94
87
  my $tmpCacheVec = "$Tmp_Dir/tmpVec"."\_$timestamp\_";
95
88
 
96
89
  my $SVMTmpResult = "$Tmp_Dir/tmpresult"."\_$timestamp\_";
@@ -25,7 +25,12 @@ use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
25
25
 
26
26
  $HeaderParseHome = "$FindBin::Bin/HeaderParseService";
27
27
 
28
- $Classifier = "svm_classify5"; # assumes installed to system path
28
+ if ($ENV{'SVM_LIGHT_HOME'}.length) {
29
+ $Classifier = "$ENV{'SVM_LIGHT_HOME'}/svm_classify"
30
+ }
31
+ else {
32
+ $Classifier = "svm_classify5"; # assume on path
33
+ }
29
34
 
30
35
  $Database_Dir = "$HeaderParseHome/resources/database/";
31
36
  $Data_Dir = "$HeaderParseHome/resources/data/";
@@ -0,0 +1,4 @@
1
+ # Ignore everything in this directory
2
+ *
3
+ # Except this file
4
+ !.gitignore
@@ -0,0 +1,75 @@
1
+ #!/usr/bin/perl -CSD
2
+ use strict;
3
+ use FindBin;
4
+
5
+ use lib "$FindBin::Bin/HeaderParseService/lib";
6
+
7
+ use HeaderParse::API::Parser;
8
+ use HeaderParse::Config::API_Config;
9
+
10
+ my $argc = scalar(@ARGV);
11
+
12
+ if ($argc != 2) {
13
+ print "Usage: ./extract.pl path_to_input path_to_output\n";
14
+ exit 1;
15
+ }
16
+
17
+ my $inputPath = $ARGV[0];
18
+ my $outputPath = $ARGV[1];
19
+
20
+ import($inputPath, $outputPath);
21
+
22
+ exit;
23
+
24
+ sub import {
25
+ my ($filePath, $id) = @_;
26
+
27
+ system("mkdir","-p","$id");
28
+
29
+ my ($status, $msg) = prep($filePath, $id);
30
+ if ($status == 0) {
31
+ print STDERR "$id: $msg\n";
32
+ }
33
+ if ($status == 1) {
34
+ print STDOUT "$id\n";
35
+ }
36
+ }
37
+
38
+
39
+ sub prep {
40
+ my ($textFile, $id) = @_;
41
+
42
+ my ($ehstatus, $msg) = extractHeader($textFile, $id);
43
+ if ($ehstatus <= 0) {
44
+ return ($ehstatus, $msg);
45
+ }
46
+
47
+ return (1, "");
48
+ }
49
+
50
+ sub extractHeader {
51
+ my ($textFile, $id) = @_;
52
+
53
+ my $jobID;
54
+ while($jobID = rand(time)) {
55
+ unless(-f $offlineD."$jobID") {
56
+ last;
57
+ }
58
+ }
59
+
60
+ my ($status, $msg, $rXML) =
61
+ HeaderParse::API::Parser::_parseHeader($textFile, $jobID);
62
+
63
+ if ($status <= 0) {
64
+ return ($status, $msg);
65
+ }
66
+
67
+ unless(open(HEAD, ">:utf8", "$outputPath/out.header")) {
68
+ return (0, "Unable to open header file: $!");
69
+ }
70
+
71
+ print HEAD $$rXML;
72
+ close HEAD;
73
+ return (1);
74
+
75
+ }