NLPPlus 2.0.22__cp313-cp313-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (465) hide show
  1. NLPPlus/__init__.py +414 -0
  2. NLPPlus/_version.py +24 -0
  3. NLPPlus/analyzers/.github/workflows/dispatch-update-package-analyzers.yml +40 -0
  4. NLPPlus/analyzers/.github/workflows/tag-on-push.yml +86 -0
  5. NLPPlus/analyzers/.github/workflows/update-parse-en-us.yml +130 -0
  6. NLPPlus/analyzers/.gitignore +5 -0
  7. NLPPlus/analyzers/.gitmodules +3 -0
  8. NLPPlus/analyzers/README.md +90 -0
  9. NLPPlus/analyzers/address-parser/README.md +146 -0
  10. NLPPlus/analyzers/address-parser/input/address-texts/01-contact.html +12 -0
  11. NLPPlus/analyzers/address-parser/input/address-texts/02-store-locator.html +14 -0
  12. NLPPlus/analyzers/address-parser/input/address-texts/03-realestate.html +19 -0
  13. NLPPlus/analyzers/address-parser/input/address-texts/04-clinic.html +11 -0
  14. NLPPlus/analyzers/address-parser/input/address-texts/05-university.html +11 -0
  15. NLPPlus/analyzers/address-parser/input/address-texts/06-law-firm.html +11 -0
  16. NLPPlus/analyzers/address-parser/input/address-texts/07-gov.html +11 -0
  17. NLPPlus/analyzers/address-parser/input/address-texts/08-hotel.html +11 -0
  18. NLPPlus/analyzers/address-parser/input/address-texts/09-museum.html +11 -0
  19. NLPPlus/analyzers/address-parser/input/address-texts/10-bank.html +11 -0
  20. NLPPlus/analyzers/address-parser/input/address-texts/11-readme.md +20 -0
  21. NLPPlus/analyzers/address-parser/input/address-texts/12-directory.md +21 -0
  22. NLPPlus/analyzers/address-parser/input/address-texts/13-event.md +21 -0
  23. NLPPlus/analyzers/address-parser/input/address-texts/14-onboarding.md +24 -0
  24. NLPPlus/analyzers/address-parser/input/address-texts/15-press-release.md +16 -0
  25. NLPPlus/analyzers/address-parser/input/address-texts/16-faq.md +21 -0
  26. NLPPlus/analyzers/address-parser/input/address-texts/17-tutorial.md +27 -0
  27. NLPPlus/analyzers/address-parser/input/address-texts/18-suppliers.md +16 -0
  28. NLPPlus/analyzers/address-parser/input/address-texts/19-shipping-policy.md +22 -0
  29. NLPPlus/analyzers/address-parser/input/address-texts/20-mixed.md +17 -0
  30. NLPPlus/analyzers/address-parser/input/address-texts/21-letter.txt +12 -0
  31. NLPPlus/analyzers/address-parser/input/address-texts/22-address-book.txt +15 -0
  32. NLPPlus/analyzers/address-parser/input/address-texts/23-shipping-label.txt +16 -0
  33. NLPPlus/analyzers/address-parser/input/address-texts/24-classifieds.txt +14 -0
  34. NLPPlus/analyzers/address-parser/input/address-texts/25-invoice.txt +16 -0
  35. NLPPlus/analyzers/address-parser/input/address-texts/26-itinerary.txt +12 -0
  36. NLPPlus/analyzers/address-parser/input/address-texts/27-resume.txt +14 -0
  37. NLPPlus/analyzers/address-parser/input/address-texts/28-meeting-minutes.txt +11 -0
  38. NLPPlus/analyzers/address-parser/input/address-texts/29-warranty.txt +14 -0
  39. NLPPlus/analyzers/address-parser/input/address-texts/30-mixed-noise.txt +17 -0
  40. NLPPlus/analyzers/address-parser/input/text.txt +24 -0
  41. NLPPlus/analyzers/address-parser/kb/user/Country.dict +196 -0
  42. NLPPlus/analyzers/address-parser/kb/user/address-synonym.dict +14 -0
  43. NLPPlus/analyzers/address-parser/kb/user/attr.kb +254 -0
  44. NLPPlus/analyzers/address-parser/kb/user/designator.dict +7 -0
  45. NLPPlus/analyzers/address-parser/kb/user/directions.dict +16 -0
  46. NLPPlus/analyzers/address-parser/kb/user/en-usa-states.dict +105 -0
  47. NLPPlus/analyzers/address-parser/kb/user/en-usa-streetsuff.dict +548 -0
  48. NLPPlus/analyzers/address-parser/kb/user/hier.kb +756 -0
  49. NLPPlus/analyzers/address-parser/kb/user/military-address.dict +9 -0
  50. NLPPlus/analyzers/address-parser/kb/user/phr.kb +3 -0
  51. NLPPlus/analyzers/address-parser/kb/user/word.kb +57 -0
  52. NLPPlus/analyzers/address-parser/spec/Changenumbers.nlp +17 -0
  53. NLPPlus/analyzers/address-parser/spec/Grouping.nlp +17 -0
  54. NLPPlus/analyzers/address-parser/spec/KBFuncs.nlp +694 -0
  55. NLPPlus/analyzers/address-parser/spec/Lines.nlp +21 -0
  56. NLPPlus/analyzers/address-parser/spec/PrecedingWords.nlp +40 -0
  57. NLPPlus/analyzers/address-parser/spec/RemovePunct.nlp +15 -0
  58. NLPPlus/analyzers/address-parser/spec/RemoveSpecialChars.nlp +30 -0
  59. NLPPlus/analyzers/address-parser/spec/RemoveWhiteSpace.nlp +20 -0
  60. NLPPlus/analyzers/address-parser/spec/analyzer.seq +18 -0
  61. NLPPlus/analyzers/address-parser/spec/countryname.nlp +18 -0
  62. NLPPlus/analyzers/address-parser/spec/funcs.nlp +141 -0
  63. NLPPlus/analyzers/address-parser/spec/information.nlp +91 -0
  64. NLPPlus/analyzers/address-parser/spec/information1.nlp +88 -0
  65. NLPPlus/analyzers/address-parser/spec/information2.nlp +19 -0
  66. NLPPlus/analyzers/address-parser/spec/information3.nlp +42 -0
  67. NLPPlus/analyzers/address-parser/spec/kbdisp01.nlp +13 -0
  68. NLPPlus/analyzers/address-parser/spec/kbinit.nlp +15 -0
  69. NLPPlus/analyzers/address-parser/spec/kbmake.nlp +17 -0
  70. NLPPlus/analyzers/address-parser/spec/output.nlp +13 -0
  71. NLPPlus/analyzers/address-parser/spec/pincode.nlp +35 -0
  72. NLPPlus/analyzers/address-parser/spec/removelines.nlp +17 -0
  73. NLPPlus/analyzers/address-parser/tmp/README.md +1 -0
  74. NLPPlus/analyzers/emailaddress/README.md +129 -0
  75. NLPPlus/analyzers/emailaddress/input/email_texts/01-acme-contact.html +29 -0
  76. NLPPlus/analyzers/emailaddress/input/email_texts/02-team-roster.html +20 -0
  77. NLPPlus/analyzers/emailaddress/input/email_texts/03-startup-landing.html +23 -0
  78. NLPPlus/analyzers/emailaddress/input/email_texts/04-university-dept.html +21 -0
  79. NLPPlus/analyzers/emailaddress/input/email_texts/05-ecommerce-help.html +17 -0
  80. NLPPlus/analyzers/emailaddress/input/email_texts/06-conference.html +17 -0
  81. NLPPlus/analyzers/emailaddress/input/email_texts/07-blog-post.html +26 -0
  82. NLPPlus/analyzers/emailaddress/input/email_texts/08-gov-services.html +15 -0
  83. NLPPlus/analyzers/emailaddress/input/email_texts/09-forum-thread.html +17 -0
  84. NLPPlus/analyzers/emailaddress/input/email_texts/10-newsletter.html +18 -0
  85. NLPPlus/analyzers/emailaddress/input/email_texts/11-readme.md +20 -0
  86. NLPPlus/analyzers/emailaddress/input/email_texts/12-meeting-notes.md +17 -0
  87. NLPPlus/analyzers/emailaddress/input/email_texts/13-job-listing.md +21 -0
  88. NLPPlus/analyzers/emailaddress/input/email_texts/14-changelog.md +19 -0
  89. NLPPlus/analyzers/emailaddress/input/email_texts/15-faq.md +24 -0
  90. NLPPlus/analyzers/emailaddress/input/email_texts/16-press-release.md +18 -0
  91. NLPPlus/analyzers/emailaddress/input/email_texts/17-tutorial.md +23 -0
  92. NLPPlus/analyzers/emailaddress/input/email_texts/18-product-docs.md +25 -0
  93. NLPPlus/analyzers/emailaddress/input/email_texts/19-community-guidelines.md +23 -0
  94. NLPPlus/analyzers/emailaddress/input/email_texts/20-recipe-blog.md +17 -0
  95. NLPPlus/analyzers/emailaddress/input/email_texts/21-email-signature.txt +9 -0
  96. NLPPlus/analyzers/emailaddress/input/email_texts/22-contact-list.txt +20 -0
  97. NLPPlus/analyzers/emailaddress/input/email_texts/23-support-ticket.txt +17 -0
  98. NLPPlus/analyzers/emailaddress/input/email_texts/24-classifieds.txt +15 -0
  99. NLPPlus/analyzers/emailaddress/input/email_texts/25-meeting-minutes.txt +17 -0
  100. NLPPlus/analyzers/emailaddress/input/email_texts/26-product-reviews.txt +16 -0
  101. NLPPlus/analyzers/emailaddress/input/email_texts/27-event-invite.txt +16 -0
  102. NLPPlus/analyzers/emailaddress/input/email_texts/28-bug-report.txt +19 -0
  103. NLPPlus/analyzers/emailaddress/input/email_texts/29-travel-itinerary.txt +15 -0
  104. NLPPlus/analyzers/emailaddress/input/email_texts/30-mixed-noise.txt +19 -0
  105. NLPPlus/analyzers/emailaddress/input/email_variations.txt +155 -0
  106. NLPPlus/analyzers/emailaddress/input/text.txt +30 -0
  107. NLPPlus/analyzers/emailaddress/kb/user/attr.kb +254 -0
  108. NLPPlus/analyzers/emailaddress/kb/user/charactders.dict +39 -0
  109. NLPPlus/analyzers/emailaddress/kb/user/country.dict +265 -0
  110. NLPPlus/analyzers/emailaddress/kb/user/domain.dict +1134 -0
  111. NLPPlus/analyzers/emailaddress/kb/user/hier.kb +756 -0
  112. NLPPlus/analyzers/emailaddress/kb/user/phr.kb +3 -0
  113. NLPPlus/analyzers/emailaddress/kb/user/word.kb +57 -0
  114. NLPPlus/analyzers/emailaddress/spec/EmailZone.nlp +31 -0
  115. NLPPlus/analyzers/emailaddress/spec/EmailZoneAt.nlp +26 -0
  116. NLPPlus/analyzers/emailaddress/spec/KBFuncs.nlp +694 -0
  117. NLPPlus/analyzers/emailaddress/spec/Lines.nlp +21 -0
  118. NLPPlus/analyzers/emailaddress/spec/analyzer.seq +20 -0
  119. NLPPlus/analyzers/emailaddress/spec/comment.nlp +18 -0
  120. NLPPlus/analyzers/emailaddress/spec/commentRemove.nlp +17 -0
  121. NLPPlus/analyzers/emailaddress/spec/email0.nlp +109 -0
  122. NLPPlus/analyzers/emailaddress/spec/email0at.nlp +134 -0
  123. NLPPlus/analyzers/emailaddress/spec/email0z.nlp +18 -0
  124. NLPPlus/analyzers/emailaddress/spec/email1.nlp +146 -0
  125. NLPPlus/analyzers/emailaddress/spec/email11.nlp +62 -0
  126. NLPPlus/analyzers/emailaddress/spec/email12.nlp +17 -0
  127. NLPPlus/analyzers/emailaddress/spec/email2.nlp +17 -0
  128. NLPPlus/analyzers/emailaddress/spec/email3.nlp +20 -0
  129. NLPPlus/analyzers/emailaddress/spec/email4.nlp +20 -0
  130. NLPPlus/analyzers/emailaddress/spec/email5.nlp +19 -0
  131. NLPPlus/analyzers/emailaddress/spec/email6.nlp +31 -0
  132. NLPPlus/analyzers/emailaddress/spec/email7.nlp +20 -0
  133. NLPPlus/analyzers/emailaddress/spec/emailChars1.nlp +16 -0
  134. NLPPlus/analyzers/emailaddress/spec/funcs.nlp +143 -0
  135. NLPPlus/analyzers/emailaddress/spec/kbdisp01.nlp +13 -0
  136. NLPPlus/analyzers/emailaddress/spec/kbinit.nlp +14 -0
  137. NLPPlus/analyzers/emailaddress/spec/output.nlp +13 -0
  138. NLPPlus/analyzers/emailaddress/tmp/README.md +1 -0
  139. NLPPlus/analyzers/links/README.md +138 -0
  140. NLPPlus/analyzers/links/input/links-texts/01-resources.html +18 -0
  141. NLPPlus/analyzers/links/input/links-texts/02-news-portal.html +19 -0
  142. NLPPlus/analyzers/links/input/links-texts/03-store.html +14 -0
  143. NLPPlus/analyzers/links/input/links-texts/04-docs.html +16 -0
  144. NLPPlus/analyzers/links/input/links-texts/05-university.html +15 -0
  145. NLPPlus/analyzers/links/input/links-texts/06-portfolio.html +16 -0
  146. NLPPlus/analyzers/links/input/links-texts/07-gov.html +14 -0
  147. NLPPlus/analyzers/links/input/links-texts/08-recipes.html +15 -0
  148. NLPPlus/analyzers/links/input/links-texts/09-conference.html +14 -0
  149. NLPPlus/analyzers/links/input/links-texts/10-help-center.html +17 -0
  150. NLPPlus/analyzers/links/input/links-texts/11-readme.md +22 -0
  151. NLPPlus/analyzers/links/input/links-texts/12-link-roundup.md +20 -0
  152. NLPPlus/analyzers/links/input/links-texts/13-contributing.md +23 -0
  153. NLPPlus/analyzers/links/input/links-texts/14-changelog.md +19 -0
  154. NLPPlus/analyzers/links/input/links-texts/15-awesome-list.md +24 -0
  155. NLPPlus/analyzers/links/input/links-texts/16-tutorial.md +24 -0
  156. NLPPlus/analyzers/links/input/links-texts/17-bookmarks.md +24 -0
  157. NLPPlus/analyzers/links/input/links-texts/18-press-release.md +20 -0
  158. NLPPlus/analyzers/links/input/links-texts/19-security-advisory.md +23 -0
  159. NLPPlus/analyzers/links/input/links-texts/20-link-dump.md +20 -0
  160. NLPPlus/analyzers/links/input/links-texts/21-signature.txt +9 -0
  161. NLPPlus/analyzers/links/input/links-texts/22-bookmark-export.txt +15 -0
  162. NLPPlus/analyzers/links/input/links-texts/23-support-ticket.txt +16 -0
  163. NLPPlus/analyzers/links/input/links-texts/24-classifieds.txt +15 -0
  164. NLPPlus/analyzers/links/input/links-texts/25-meeting-notes.txt +16 -0
  165. NLPPlus/analyzers/links/input/links-texts/26-reviews.txt +15 -0
  166. NLPPlus/analyzers/links/input/links-texts/27-itinerary.txt +14 -0
  167. NLPPlus/analyzers/links/input/links-texts/28-bug-report.txt +18 -0
  168. NLPPlus/analyzers/links/input/links-texts/29-syllabus.txt +15 -0
  169. NLPPlus/analyzers/links/input/links-texts/30-mixed-noise.txt +19 -0
  170. NLPPlus/analyzers/links/input/text.txt +19 -0
  171. NLPPlus/analyzers/links/kb/user/attr.kb +254 -0
  172. NLPPlus/analyzers/links/kb/user/country.dict +264 -0
  173. NLPPlus/analyzers/links/kb/user/domain.dict +1134 -0
  174. NLPPlus/analyzers/links/kb/user/hier.kb +756 -0
  175. NLPPlus/analyzers/links/kb/user/phr.kb +3 -0
  176. NLPPlus/analyzers/links/kb/user/schemelist.dict +39 -0
  177. NLPPlus/analyzers/links/kb/user/word.kb +57 -0
  178. NLPPlus/analyzers/links/spec/KBFuncs.nlp +694 -0
  179. NLPPlus/analyzers/links/spec/Lines.nlp +21 -0
  180. NLPPlus/analyzers/links/spec/Link1.nlp +17 -0
  181. NLPPlus/analyzers/links/spec/Link2.nlp +27 -0
  182. NLPPlus/analyzers/links/spec/Link3.nlp +16 -0
  183. NLPPlus/analyzers/links/spec/Link4.nlp +32 -0
  184. NLPPlus/analyzers/links/spec/Link5.nlp +115 -0
  185. NLPPlus/analyzers/links/spec/Link6.nlp +76 -0
  186. NLPPlus/analyzers/links/spec/Link7.nlp +24 -0
  187. NLPPlus/analyzers/links/spec/Link8.nlp +12 -0
  188. NLPPlus/analyzers/links/spec/LinkZone.nlp +46 -0
  189. NLPPlus/analyzers/links/spec/Links.nlp +52 -0
  190. NLPPlus/analyzers/links/spec/Links3.nlp +21 -0
  191. NLPPlus/analyzers/links/spec/Links41.nlp +30 -0
  192. NLPPlus/analyzers/links/spec/RemoveWhiteSpace.nlp +20 -0
  193. NLPPlus/analyzers/links/spec/analyzer.seq +16 -0
  194. NLPPlus/analyzers/links/spec/funcs.nlp +88 -0
  195. NLPPlus/analyzers/links/spec/kbDisplay.nlp +13 -0
  196. NLPPlus/analyzers/links/spec/kbdisp00.nlp +11 -0
  197. NLPPlus/analyzers/links/spec/kbinit.nlp +13 -0
  198. NLPPlus/analyzers/links/spec/links1.nlp +14 -0
  199. NLPPlus/analyzers/links/spec/linksphno.nlp +34 -0
  200. NLPPlus/analyzers/links/spec/output.nlp +13 -0
  201. NLPPlus/analyzers/links/spec/removeblankline.nlp +15 -0
  202. NLPPlus/analyzers/links/tmp/README.md +1 -0
  203. NLPPlus/analyzers/parse-en-us/.github/workflows/dispatch-update-parse-en-us.yml +37 -0
  204. NLPPlus/analyzers/parse-en-us/LICENSE +21 -0
  205. NLPPlus/analyzers/parse-en-us/README.md +15 -0
  206. NLPPlus/analyzers/parse-en-us/input/business.txt +37 -0
  207. NLPPlus/analyzers/parse-en-us/input/doj.txt +7 -0
  208. NLPPlus/analyzers/parse-en-us/input/test.txt +1 -0
  209. NLPPlus/analyzers/parse-en-us/kb/user/all.dict +191543 -0
  210. NLPPlus/analyzers/parse-en-us/kb/user/attr.kb +254 -0
  211. NLPPlus/analyzers/parse-en-us/kb/user/hier.kb +774 -0
  212. NLPPlus/analyzers/parse-en-us/kb/user/phr.kb +3 -0
  213. NLPPlus/analyzers/parse-en-us/kb/user/word.kb +39 -0
  214. NLPPlus/analyzers/parse-en-us/spec/CYCLE_0.nlp +14 -0
  215. NLPPlus/analyzers/parse-en-us/spec/CYCLE_1.nlp +12 -0
  216. NLPPlus/analyzers/parse-en-us/spec/CYCLE_2.nlp +14 -0
  217. NLPPlus/analyzers/parse-en-us/spec/KBFuncs.nlp +304 -0
  218. NLPPlus/analyzers/parse-en-us/spec/LINES_nosp.nlp +30 -0
  219. NLPPlus/analyzers/parse-en-us/spec/Lines.nlp +21 -0
  220. NLPPlus/analyzers/parse-en-us/spec/analyzer.seq +136 -0
  221. NLPPlus/analyzers/parse-en-us/spec/anaphora100.nlp +160 -0
  222. NLPPlus/analyzers/parse-en-us/spec/buff_out.nlp +116 -0
  223. NLPPlus/analyzers/parse-en-us/spec/caps100.nlp +173 -0
  224. NLPPlus/analyzers/parse-en-us/spec/caps50.nlp +654 -0
  225. NLPPlus/analyzers/parse-en-us/spec/city.nlp +1247 -0
  226. NLPPlus/analyzers/parse-en-us/spec/clause100.nlp +32 -0
  227. NLPPlus/analyzers/parse-en-us/spec/clause200.nlp +24 -0
  228. NLPPlus/analyzers/parse-en-us/spec/clause300.nlp +174 -0
  229. NLPPlus/analyzers/parse-en-us/spec/clause400.nlp +105 -0
  230. NLPPlus/analyzers/parse-en-us/spec/clause_out.nlp +36 -0
  231. NLPPlus/analyzers/parse-en-us/spec/clause_pos.nlp +100 -0
  232. NLPPlus/analyzers/parse-en-us/spec/clausesem.nlp +28 -0
  233. NLPPlus/analyzers/parse-en-us/spec/common.nlp +2905 -0
  234. NLPPlus/analyzers/parse-en-us/spec/country.nlp +79 -0
  235. NLPPlus/analyzers/parse-en-us/spec/date.nlp +269 -0
  236. NLPPlus/analyzers/parse-en-us/spec/dictfix.nlp +772 -0
  237. NLPPlus/analyzers/parse-en-us/spec/displayKB.nlp +13 -0
  238. NLPPlus/analyzers/parse-en-us/spec/doc.nlp +151 -0
  239. NLPPlus/analyzers/parse-en-us/spec/dom100.nlp +154 -0
  240. NLPPlus/analyzers/parse-en-us/spec/dom150.nlp +243 -0
  241. NLPPlus/analyzers/parse-en-us/spec/dom175.nlp +592 -0
  242. NLPPlus/analyzers/parse-en-us/spec/dom200.nlp +2810 -0
  243. NLPPlus/analyzers/parse-en-us/spec/domfuns.nlp +170 -0
  244. NLPPlus/analyzers/parse-en-us/spec/dqan100.nlp +442 -0
  245. NLPPlus/analyzers/parse-en-us/spec/dqan200.nlp +181 -0
  246. NLPPlus/analyzers/parse-en-us/spec/entity100.nlp +314 -0
  247. NLPPlus/analyzers/parse-en-us/spec/feat.nlp +37 -0
  248. NLPPlus/analyzers/parse-en-us/spec/feat100.nlp +248 -0
  249. NLPPlus/analyzers/parse-en-us/spec/fin.nlp +59 -0
  250. NLPPlus/analyzers/parse-en-us/spec/fnretok.nlp +187 -0
  251. NLPPlus/analyzers/parse-en-us/spec/fnword.nlp +1411 -0
  252. NLPPlus/analyzers/parse-en-us/spec/funs.nlp +5923 -0
  253. NLPPlus/analyzers/parse-en-us/spec/hilite_alpha.nlp +22 -0
  254. NLPPlus/analyzers/parse-en-us/spec/hilite_clause.nlp +42 -0
  255. NLPPlus/analyzers/parse-en-us/spec/hilite_ne.nlp +30 -0
  256. NLPPlus/analyzers/parse-en-us/spec/hilite_np.nlp +27 -0
  257. NLPPlus/analyzers/parse-en-us/spec/hilite_pos.nlp +24 -0
  258. NLPPlus/analyzers/parse-en-us/spec/hilite_postag.nlp +26 -0
  259. NLPPlus/analyzers/parse-en-us/spec/hilite_spatial.nlp +35 -0
  260. NLPPlus/analyzers/parse-en-us/spec/hilite_temporal.nlp +41 -0
  261. NLPPlus/analyzers/parse-en-us/spec/hilite_undone.nlp +33 -0
  262. NLPPlus/analyzers/parse-en-us/spec/inc100.nlp +917 -0
  263. NLPPlus/analyzers/parse-en-us/spec/inc300.nlp +15 -0
  264. NLPPlus/analyzers/parse-en-us/spec/incap100.nlp +165 -0
  265. NLPPlus/analyzers/parse-en-us/spec/inclausesem.nlp +44 -0
  266. NLPPlus/analyzers/parse-en-us/spec/ini.nlp +139 -0
  267. NLPPlus/analyzers/parse-en-us/spec/jdfasd.nlp +18 -0
  268. NLPPlus/analyzers/parse-en-us/spec/kb_geo.nlp +62 -0
  269. NLPPlus/analyzers/parse-en-us/spec/kb_onto.nlp +35 -0
  270. NLPPlus/analyzers/parse-en-us/spec/kbload.nlp +2597 -0
  271. NLPPlus/analyzers/parse-en-us/spec/lex_unambig.nlp +67 -0
  272. NLPPlus/analyzers/parse-en-us/spec/lex_unambig2.nlp +54 -0
  273. NLPPlus/analyzers/parse-en-us/spec/location100.nlp +179 -0
  274. NLPPlus/analyzers/parse-en-us/spec/lookup_word.nlp +84 -0
  275. NLPPlus/analyzers/parse-en-us/spec/mhbv100.nlp +404 -0
  276. NLPPlus/analyzers/parse-en-us/spec/ne_out.nlp +92 -0
  277. NLPPlus/analyzers/parse-en-us/spec/ne_xml.nlp +85 -0
  278. NLPPlus/analyzers/parse-en-us/spec/num.nlp +662 -0
  279. NLPPlus/analyzers/parse-en-us/spec/oldsemfuns.nlp +1004 -0
  280. NLPPlus/analyzers/parse-en-us/spec/phr100.nlp +420 -0
  281. NLPPlus/analyzers/parse-en-us/spec/phr50.nlp +160 -0
  282. NLPPlus/analyzers/parse-en-us/spec/pos10.nlp +168 -0
  283. NLPPlus/analyzers/parse-en-us/spec/pos100.nlp +466 -0
  284. NLPPlus/analyzers/parse-en-us/spec/pos200.nlp +694 -0
  285. NLPPlus/analyzers/parse-en-us/spec/pos25.nlp +3559 -0
  286. NLPPlus/analyzers/parse-en-us/spec/pos300.nlp +218 -0
  287. NLPPlus/analyzers/parse-en-us/spec/pos400.nlp +236 -0
  288. NLPPlus/analyzers/parse-en-us/spec/pos50.nlp +17226 -0
  289. NLPPlus/analyzers/parse-en-us/spec/pos75.nlp +294 -0
  290. NLPPlus/analyzers/parse-en-us/spec/pos_out.nlp +134 -0
  291. NLPPlus/analyzers/parse-en-us/spec/pos_out_noscore - Copy.nlp +210 -0
  292. NLPPlus/analyzers/parse-en-us/spec/pos_out_noscore.nlp +266 -0
  293. NLPPlus/analyzers/parse-en-us/spec/poserr.nlp +22 -0
  294. NLPPlus/analyzers/parse-en-us/spec/posfuns.nlp +2501 -0
  295. NLPPlus/analyzers/parse-en-us/spec/pre_100.nlp +38 -0
  296. NLPPlus/analyzers/parse-en-us/spec/pre_200.nlp +47 -0
  297. NLPPlus/analyzers/parse-en-us/spec/pre_300.nlp +35 -0
  298. NLPPlus/analyzers/parse-en-us/spec/pre_400.nlp +39 -0
  299. NLPPlus/analyzers/parse-en-us/spec/pre_500.nlp +43 -0
  300. NLPPlus/analyzers/parse-en-us/spec/pre_600.nlp +21 -0
  301. NLPPlus/analyzers/parse-en-us/spec/pre_badeos.nlp +33 -0
  302. NLPPlus/analyzers/parse-en-us/spec/pre_form.nlp +19 -0
  303. NLPPlus/analyzers/parse-en-us/spec/pre_zap.nlp +28 -0
  304. NLPPlus/analyzers/parse-en-us/spec/pre_zap1.nlp +21 -0
  305. NLPPlus/analyzers/parse-en-us/spec/pre_zap2.nlp +29 -0
  306. NLPPlus/analyzers/parse-en-us/spec/punct100.nlp +416 -0
  307. NLPPlus/analyzers/parse-en-us/spec/punct200.nlp +52 -0
  308. NLPPlus/analyzers/parse-en-us/spec/qclause100.nlp +495 -0
  309. NLPPlus/analyzers/parse-en-us/spec/qclause200.nlp +57 -0
  310. NLPPlus/analyzers/parse-en-us/spec/qclause300.nlp +39 -0
  311. NLPPlus/analyzers/parse-en-us/spec/qclause50.nlp +465 -0
  312. NLPPlus/analyzers/parse-en-us/spec/qclause75.nlp +78 -0
  313. NLPPlus/analyzers/parse-en-us/spec/qconj100.nlp +78 -0
  314. NLPPlus/analyzers/parse-en-us/spec/qconj200.nlp +92 -0
  315. NLPPlus/analyzers/parse-en-us/spec/qline100.nlp +49 -0
  316. NLPPlus/analyzers/parse-en-us/spec/qseg100.nlp +134 -0
  317. NLPPlus/analyzers/parse-en-us/spec/qseg125.nlp +40 -0
  318. NLPPlus/analyzers/parse-en-us/spec/qseg150.nlp +115 -0
  319. NLPPlus/analyzers/parse-en-us/spec/qseg200.nlp +126 -0
  320. NLPPlus/analyzers/parse-en-us/spec/qsemfuns.nlp +63 -0
  321. NLPPlus/analyzers/parse-en-us/spec/qsent100.nlp +178 -0
  322. NLPPlus/analyzers/parse-en-us/spec/qsent50.nlp +522 -0
  323. NLPPlus/analyzers/parse-en-us/spec/qsent75.nlp +157 -0
  324. NLPPlus/analyzers/parse-en-us/spec/quotes100.nlp +61 -0
  325. NLPPlus/analyzers/parse-en-us/spec/rule_out.nlp +161 -0
  326. NLPPlus/analyzers/parse-en-us/spec/seg100.nlp +335 -0
  327. NLPPlus/analyzers/parse-en-us/spec/semfuns.nlp +2104 -0
  328. NLPPlus/analyzers/parse-en-us/spec/sent100.nlp +44 -0
  329. NLPPlus/analyzers/parse-en-us/spec/sent125.nlp +23 -0
  330. NLPPlus/analyzers/parse-en-us/spec/sent200.nlp +97 -0
  331. NLPPlus/analyzers/parse-en-us/spec/sent250.nlp +54 -0
  332. NLPPlus/analyzers/parse-en-us/spec/sent300.nlp +32 -0
  333. NLPPlus/analyzers/parse-en-us/spec/sent400.nlp +525 -0
  334. NLPPlus/analyzers/parse-en-us/spec/sent50.nlp +24 -0
  335. NLPPlus/analyzers/parse-en-us/spec/sent_pos.nlp +105 -0
  336. NLPPlus/analyzers/parse-en-us/spec/sentsem.nlp +27 -0
  337. NLPPlus/analyzers/parse-en-us/spec/study100.nlp +54 -0
  338. NLPPlus/analyzers/parse-en-us/spec/tag100.nlp +100 -0
  339. NLPPlus/analyzers/parse-en-us/spec/tag50.nlp +28 -0
  340. NLPPlus/analyzers/parse-en-us/spec/tags100.nlp +60 -0
  341. NLPPlus/analyzers/parse-en-us/spec/textzone100.nlp +40 -0
  342. NLPPlus/analyzers/parse-en-us/spec/textzone50.nlp +24 -0
  343. NLPPlus/analyzers/parse-en-us/spec/tmp.seq +134 -0
  344. NLPPlus/analyzers/parse-en-us/spec/tok10.nlp +86 -0
  345. NLPPlus/analyzers/parse-en-us/spec/tok100.nlp +451 -0
  346. NLPPlus/analyzers/parse-en-us/spec/tok150.nlp +61 -0
  347. NLPPlus/analyzers/parse-en-us/spec/tok175.nlp +637 -0
  348. NLPPlus/analyzers/parse-en-us/spec/tok200.nlp +154 -0
  349. NLPPlus/analyzers/parse-en-us/spec/tok25.nlp +24 -0
  350. NLPPlus/analyzers/parse-en-us/spec/tok50.nlp +645 -0
  351. NLPPlus/analyzers/parse-en-us/spec/trav_dict.nlp +19 -0
  352. NLPPlus/analyzers/parse-en-us/spec/xmlfns.nlp +69 -0
  353. NLPPlus/analyzers/parse-en-us/spec/xmlrecursive.nlp +283 -0
  354. NLPPlus/analyzers/parse-en-us/spec/zap100.nlp +39 -0
  355. NLPPlus/analyzers/parse-en-us/spec/zaplines.nlp +21 -0
  356. NLPPlus/analyzers/parse-en-us/spec/zapwhite.nlp +20 -0
  357. NLPPlus/analyzers/parse-en-us/tmp/README.md +1 -0
  358. NLPPlus/analyzers/telephone/README.md +65 -0
  359. NLPPlus/analyzers/telephone/input/phone_variations.txt +262 -0
  360. NLPPlus/analyzers/telephone/input/telephone-texts/01-acme-contact.html +19 -0
  361. NLPPlus/analyzers/telephone/input/telephone-texts/02-store-locator.html +16 -0
  362. NLPPlus/analyzers/telephone/input/telephone-texts/03-restaurant.html +12 -0
  363. NLPPlus/analyzers/telephone/input/telephone-texts/04-clinic.html +13 -0
  364. NLPPlus/analyzers/telephone/input/telephone-texts/05-realestate.html +19 -0
  365. NLPPlus/analyzers/telephone/input/telephone-texts/06-support.html +16 -0
  366. NLPPlus/analyzers/telephone/input/telephone-texts/07-law-firm.html +13 -0
  367. NLPPlus/analyzers/telephone/input/telephone-texts/08-school.html +13 -0
  368. NLPPlus/analyzers/telephone/input/telephone-texts/09-hotel.html +13 -0
  369. NLPPlus/analyzers/telephone/input/telephone-texts/10-newsroom.html +14 -0
  370. NLPPlus/analyzers/telephone/input/telephone-texts/11-readme.md +20 -0
  371. NLPPlus/analyzers/telephone/input/telephone-texts/12-meeting-notes.md +16 -0
  372. NLPPlus/analyzers/telephone/input/telephone-texts/13-product-docs.md +18 -0
  373. NLPPlus/analyzers/telephone/input/telephone-texts/14-changelog.md +17 -0
  374. NLPPlus/analyzers/telephone/input/telephone-texts/15-faq.md +19 -0
  375. NLPPlus/analyzers/telephone/input/telephone-texts/16-press-release.md +17 -0
  376. NLPPlus/analyzers/telephone/input/telephone-texts/17-tutorial.md +20 -0
  377. NLPPlus/analyzers/telephone/input/telephone-texts/18-directory.md +21 -0
  378. NLPPlus/analyzers/telephone/input/telephone-texts/19-onboarding.md +19 -0
  379. NLPPlus/analyzers/telephone/input/telephone-texts/20-event.md +19 -0
  380. NLPPlus/analyzers/telephone/input/telephone-texts/21-signature.txt +9 -0
  381. NLPPlus/analyzers/telephone/input/telephone-texts/22-phone-list.txt +18 -0
  382. NLPPlus/analyzers/telephone/input/telephone-texts/23-support-ticket.txt +13 -0
  383. NLPPlus/analyzers/telephone/input/telephone-texts/24-classifieds.txt +14 -0
  384. NLPPlus/analyzers/telephone/input/telephone-texts/25-meeting-minutes.txt +13 -0
  385. NLPPlus/analyzers/telephone/input/telephone-texts/26-reviews.txt +14 -0
  386. NLPPlus/analyzers/telephone/input/telephone-texts/27-itinerary.txt +13 -0
  387. NLPPlus/analyzers/telephone/input/telephone-texts/28-bug-report.txt +17 -0
  388. NLPPlus/analyzers/telephone/input/telephone-texts/29-utility.txt +12 -0
  389. NLPPlus/analyzers/telephone/input/telephone-texts/30-mixed-noise.txt +18 -0
  390. NLPPlus/analyzers/telephone/input/text.txt +27 -0
  391. NLPPlus/analyzers/telephone/kb/user/attr.kb +254 -0
  392. NLPPlus/analyzers/telephone/kb/user/hier.kb +756 -0
  393. NLPPlus/analyzers/telephone/kb/user/phr.kb +3 -0
  394. NLPPlus/analyzers/telephone/kb/user/tel-country-codes.kbb +206 -0
  395. NLPPlus/analyzers/telephone/kb/user/word.kb +57 -0
  396. NLPPlus/analyzers/telephone/spec/KBFuncs.nlp +694 -0
  397. NLPPlus/analyzers/telephone/spec/Lines.nlp +21 -0
  398. NLPPlus/analyzers/telephone/spec/Telep0.nlp +69 -0
  399. NLPPlus/analyzers/telephone/spec/Telep1.nlp +180 -0
  400. NLPPlus/analyzers/telephone/spec/Telep1b.nlp +31 -0
  401. NLPPlus/analyzers/telephone/spec/Telep2.nlp +18 -0
  402. NLPPlus/analyzers/telephone/spec/TelepIntl.nlp +45 -0
  403. NLPPlus/analyzers/telephone/spec/TelepIntlCat.nlp +55 -0
  404. NLPPlus/analyzers/telephone/spec/TelephoneZone.nlp +36 -0
  405. NLPPlus/analyzers/telephone/spec/analyzer.seq +13 -0
  406. NLPPlus/analyzers/telephone/spec/funcs.nlp +121 -0
  407. NLPPlus/analyzers/telephone/spec/kbdisp01.nlp +13 -0
  408. NLPPlus/analyzers/telephone/spec/kbinit.nlp +16 -0
  409. NLPPlus/analyzers/telephone/spec/output.nlp +13 -0
  410. NLPPlus/analyzers/telephone/spec/tele1.nlp +15 -0
  411. NLPPlus/analyzers/telephone/tmp/README.md +1 -0
  412. NLPPlus/bindings.cp313-win_amd64.pyd +0 -0
  413. NLPPlus/cloud.py +482 -0
  414. NLPPlus/data/rfb/spec/Copy of decl.nlp +15 -0
  415. NLPPlus/data/rfb/spec/actions.nlp +44 -0
  416. NLPPlus/data/rfb/spec/analyzer.seq +37 -0
  417. NLPPlus/data/rfb/spec/bigtok.nlp +132 -0
  418. NLPPlus/data/rfb/spec/checks.nlp +17 -0
  419. NLPPlus/data/rfb/spec/code.nlp +36 -0
  420. NLPPlus/data/rfb/spec/components.nlp +60 -0
  421. NLPPlus/data/rfb/spec/decl.nlp +36 -0
  422. NLPPlus/data/rfb/spec/decls.nlp +37 -0
  423. NLPPlus/data/rfb/spec/element.nlp +19 -0
  424. NLPPlus/data/rfb/spec/finalerr.nlp +22 -0
  425. NLPPlus/data/rfb/spec/gram1.nlp +50 -0
  426. NLPPlus/data/rfb/spec/gram2-saf.nlp +250 -0
  427. NLPPlus/data/rfb/spec/gram2.nlp +484 -0
  428. NLPPlus/data/rfb/spec/gram3.nlp +11 -0
  429. NLPPlus/data/rfb/spec/gram4.nlp +51 -0
  430. NLPPlus/data/rfb/spec/gram5.nlp +99 -0
  431. NLPPlus/data/rfb/spec/list.nlp +30 -0
  432. NLPPlus/data/rfb/spec/list1.nlp +44 -0
  433. NLPPlus/data/rfb/spec/multi.nlp +15 -0
  434. NLPPlus/data/rfb/spec/nlppp.nlp +129 -0
  435. NLPPlus/data/rfb/spec/nodes.nlp +15 -0
  436. NLPPlus/data/rfb/spec/pair.nlp +41 -0
  437. NLPPlus/data/rfb/spec/pairs.nlp +28 -0
  438. NLPPlus/data/rfb/spec/path.nlp +15 -0
  439. NLPPlus/data/rfb/spec/posts.nlp +16 -0
  440. NLPPlus/data/rfb/spec/preaction.nlp +16 -0
  441. NLPPlus/data/rfb/spec/pres.nlp +17 -0
  442. NLPPlus/data/rfb/spec/recurse.nlp +15 -0
  443. NLPPlus/data/rfb/spec/recurses.nlp +15 -0
  444. NLPPlus/data/rfb/spec/region.nlp +16 -0
  445. NLPPlus/data/rfb/spec/regions.nlp +15 -0
  446. NLPPlus/data/rfb/spec/retok.nlp +78 -0
  447. NLPPlus/data/rfb/spec/rule.nlp +80 -0
  448. NLPPlus/data/rfb/spec/rules.nlp +17 -0
  449. NLPPlus/data/rfb/spec/rulesfile.nlp +29 -0
  450. NLPPlus/data/rfb/spec/select.nlp +17 -0
  451. NLPPlus/data/rfb/spec/tmp.nlp +22 -0
  452. NLPPlus/data/rfb/spec/tmp.seq +36 -0
  453. NLPPlus/data/rfb/spec/tmp1.nlp +16 -0
  454. NLPPlus/data/rfb/spec/un_mark.nlp +16 -0
  455. NLPPlus/data/rfb/spec/x_commas.nlp +15 -0
  456. NLPPlus/data/rfb/spec/x_white.nlp +15 -0
  457. NLPPlus/data/rfb/spec/xvar.nlp +21 -0
  458. nlpplus-2.0.22.dist-info/DELVEWHEEL +2 -0
  459. nlpplus-2.0.22.dist-info/METADATA +476 -0
  460. nlpplus-2.0.22.dist-info/RECORD +465 -0
  461. nlpplus-2.0.22.dist-info/WHEEL +5 -0
  462. nlpplus-2.0.22.dist-info/licenses/LICENSE +21 -0
  463. nlpplus.libs/icudt78-ef378328df9c3fcff89cc09bc1ae7038.dll +0 -0
  464. nlpplus.libs/icuuc78-a9fab54320205de057cb60aeb15f5668.dll +0 -0
  465. nlpplus.libs/msvcp140-a4c2229bdc2a2a630acdc095b4d86008.dll +0 -0
NLPPlus/__init__.py ADDED
@@ -0,0 +1,414 @@
1
+ """Python extension for NLP++ text analysis engine.
2
+
3
+ Basic usage:
4
+
5
+ import NLPPlus
6
+ xml = NLPPlus.analyze("This is some text to be parsed")
7
+ print(xml)
8
+
9
+ """
10
+
11
+
12
+ # start delvewheel patch
13
+ def _delvewheel_patch_1_13_0():
14
+ import os
15
+ if os.path.isdir(libs_dir := os.path.abspath(os.path.join(os.path.dirname(__file__), os.pardir, 'nlpplus.libs'))):
16
+ os.add_dll_directory(libs_dir)
17
+
18
+
19
+ _delvewheel_patch_1_13_0()
20
+ del _delvewheel_patch_1_13_0
21
+ # end delvewheel patch
22
+
23
+ import json
24
+ import logging
25
+ from shutil import copytree, rmtree
26
+ from tempfile import TemporaryDirectory
27
+ from os import PathLike, getcwd
28
+ from pathlib import Path
29
+ from typing import Optional, Any
30
+ import os
31
+ import glob
32
+
33
+ from .bindings import NLP_ENGINE # type: ignore
34
+
35
+ LOGGER = logging.getLogger("NLPPlus")
36
+
37
+
38
+ def maybe_readfile(path: Path) -> Optional[str]:
39
+ """Bogus utility function to maybe read a file."""
40
+ if not path.exists():
41
+ return None
42
+ with open(path, "rt") as infh:
43
+ return infh.read()
44
+
45
+
46
+ class EngineException(BaseException):
47
+ pass
48
+
49
+
50
+ class Results:
51
+ """Various results produced by the NLP++ analyzer."""
52
+
53
+ def __init__(self, outtext: str, outdir: PathLike):
54
+ LOGGER.info("Reading output from %s", outdir)
55
+ self.output_text = outtext
56
+ self.outdir = Path(outdir)
57
+
58
+ @property
59
+ def final_tree(self) -> Optional[str]:
60
+ """The final parse tree, if any was produced."""
61
+ return maybe_readfile(self.outdir / "final.tree")
62
+
63
+ @property
64
+ def output_json(self) -> Optional[str]:
65
+ """The output JSON text, if any was produced."""
66
+ return maybe_readfile(self.outdir / "output.json")
67
+
68
+ @property
69
+ def output(self) -> Optional[Any]:
70
+ """The parsed output Json, if any was produced"""
71
+ output_json = self.output_json
72
+ if output_json is not None:
73
+ return json.loads(output_json)
74
+ return None
75
+
76
+
77
+ class Engine:
78
+ """NLP++ Engine for a given working folder.
79
+
80
+ Args:
81
+
82
+ working_folder(optional, PathLike): Working folder for this
83
+ instance. If None, a temporary directory will be created
84
+ and initialized with the default analyzers. Otherwise,
85
+ this must contain an `analyzers` and a `data` folder,
86
+ unless `initialize` is `True`.
87
+ verbose(optional, bool): Be more verbose.
88
+ initialize(optional, bool): Initialize `working_folder` with
89
+ the default analyzers.
90
+ """
91
+ def __init__(
92
+ self,
93
+ working_folder: Optional[PathLike] = None,
94
+ analyzer_path: str = None,
95
+ verbose: bool = False,
96
+ initialize: bool = False,
97
+ ):
98
+ self._closed = False
99
+ if working_folder is None:
100
+ # ignore_cleanup_errors=True is now defense-in-depth: close()
101
+ # / __exit__ / __del__ explicitly call self.engine.close()
102
+ # before TemporaryDirectory.cleanup, which closes the engine's
103
+ # cgerr.log handle and lets Windows delete the temp dir
104
+ # cleanly. The flag still catches the corner case of an
105
+ # interpreter shutdown where __del__ never runs (e.g. crash,
106
+ # os._exit) — the OS reclaims the tempdir at process exit
107
+ # regardless, so swallowing the cleanup error keeps stderr
108
+ # quiet for users who never explicitly close. Engine-side
109
+ # fix shipped in NLP-ENGINE-523 (engine v3.1.55+).
110
+ self.tmpdir = TemporaryDirectory(
111
+ prefix="NLPPlus-", ignore_cleanup_errors=True
112
+ )
113
+ self.working_folder = Path(self.tmpdir.name)
114
+ initialize = True
115
+ else:
116
+ self.tmpdir = None
117
+ self.working_folder = Path(working_folder)
118
+ self.analyzer_path = None
119
+ if initialize:
120
+ copytree(
121
+ Path(__file__).parent / "analyzers", self.working_folder / "analyzers"
122
+ )
123
+ copytree(Path(__file__).parent / "data", self.working_folder / "data")
124
+ LOGGER.info("Initialized working folder in %s", self.working_folder)
125
+ if not (self.working_folder / "analyzers").is_dir():
126
+ raise EngineException(
127
+ f"analyzers directory not found in folder '{working_folder}'"
128
+ )
129
+ if not (self.working_folder / "data").is_dir():
130
+ raise EngineException(
131
+ f"data directory not found in folder '{working_folder}'"
132
+ )
133
+ self.engine = NLP_ENGINE(str(self.working_folder), silent=not verbose)
134
+
135
+ def close(self):
136
+ """Tear down the underlying engine and release the working folder.
137
+
138
+ Idempotent: safe to call multiple times. After ``close()``, any
139
+ call to :meth:`analyze`, :meth:`compile`, or :meth:`cloud_compile`
140
+ is undefined behavior — create a new ``Engine`` instead.
141
+
142
+ On Windows in particular, calling ``close()`` (or using ``Engine``
143
+ as a context manager) is what makes the auto-created
144
+ ``TemporaryDirectory`` working folder delete cleanly: the engine
145
+ keeps a file handle on ``<workfolder>/logs/cgerr.log`` open for
146
+ the lifetime of the C++ instance, and Windows refuses to delete
147
+ a directory that contains an open file. ``close()`` calls into
148
+ the C++ engine's ``close()`` (NLP-ENGINE-523, engine v3.1.55+),
149
+ which releases that handle before the tempdir is removed.
150
+ """
151
+ if self._closed:
152
+ return
153
+ self._closed = True
154
+ # Engine.close() is idempotent on the C++ side as of engine
155
+ # v3.1.55; older engines just no-op the second teardown.
156
+ try:
157
+ self.engine.close()
158
+ except AttributeError:
159
+ # Pre-3.1.55 binding without the close() method exposed;
160
+ # fall back to letting __del__ tear it down. The tempdir
161
+ # cleanup below will still hit the PermissionError on
162
+ # Windows in that case — same situation as before 2.0.4.
163
+ pass
164
+ if self.tmpdir is not None:
165
+ self.tmpdir.cleanup()
166
+ self.tmpdir = None
167
+
168
+ def __enter__(self):
169
+ return self
170
+
171
+ def __exit__(self, exc_type, exc_value, traceback):
172
+ self.close()
173
+ return False
174
+
175
+ def __del__(self):
176
+ # Best-effort: __del__ may run during interpreter shutdown when
177
+ # modules are being torn down and the bindings module may already
178
+ # be gone. Swallow anything that goes wrong here; the explicit
179
+ # close() / context-manager paths are the supported way to get
180
+ # deterministic cleanup.
181
+ try:
182
+ self.close()
183
+ except Exception:
184
+ pass
185
+
186
+ def analyze(self, text: str, analyzer_name: str, develop: bool = False,
187
+ compiled: bool = False) -> Results:
188
+ """Analyze text with the named analyzer.
189
+
190
+ Args:
191
+ text: input text to analyze.
192
+ analyzer_name: name of the analyzer under the working folder.
193
+ develop: if True, the engine emits intermediate log/tree files
194
+ into the analyzer's `_log` directory.
195
+ compiled: if True, the engine loads the analyzer's compiled
196
+ shared libraries (``bin/run.<ext>`` for the analyzer
197
+ body and ``bin/kb.<ext>`` for the compiled KB)
198
+ instead of running interpreted from the ``.nlp``
199
+ source. See :meth:`compile` to produce the
200
+ generated C++ sources for those libraries, and the
201
+ package README for the cmake / cloud build step
202
+ that turns them into the actual ``.so``/``.dylib``/
203
+ ``.dll`` files.
204
+ """
205
+ analyzer_name = Path(analyzer_name)
206
+ outdir = self.working_folder / "analyzers" / analyzer_name / "output"
207
+ if self.analyzer_path:
208
+ analyzer_name = Path(self.analyzer_path) / analyzer_name
209
+ outdir = Path(self.analyzer_path) / "analyzers" / analyzer_name / "output"
210
+ # Delete all files in the outdir
211
+ file_list = glob.glob(str(outdir / "*"))
212
+ for file_path in file_list:
213
+ os.remove(file_path)
214
+ outtext = self.engine.analyze(str(analyzer_name), text, develop,
215
+ compiled)
216
+ return Results(outtext, outdir)
217
+
218
+ def compile(self, analyzer_name: str, develop: bool = False,
219
+ kb_only: bool = False, analyzer_only: bool = False) -> Path:
220
+ """Generate C++ source files for the named analyzer.
221
+
222
+ Runs the engine in ``-COMPILE`` mode, or ``-COMPILEKB`` if
223
+ ``kb_only=True`` (KB only), or ``-COMPILEANA`` if
224
+ ``analyzer_only=True`` (analyzer rules only, skipping the KB).
225
+ ``-COMPILE`` emits the analyzer body under ``<analyzer>/run/``
226
+ and the knowledge base under ``<analyzer>/kb/``; ``-COMPILEKB``
227
+ emits just ``<analyzer>/kb/``; ``-COMPILEANA`` emits just
228
+ ``<analyzer>/run/``. Returns the analyzer directory containing
229
+ those generated trees.
230
+
231
+ Use ``analyzer_only=True`` when only the rules changed and the
232
+ KB is already compiled. ``kb_only`` and ``analyzer_only`` are
233
+ mutually exclusive.
234
+
235
+ The generated C++ still needs to be built into shared
236
+ libraries before :meth:`analyze` can load them with
237
+ ``compiled=True``. Use :meth:`cloud_compile` to do the build
238
+ step via the public nlp-compile-service in one call.
239
+ """
240
+ if kb_only and analyzer_only:
241
+ raise ValueError("compile: kb_only and analyzer_only are mutually exclusive")
242
+ analyzer_name_p = Path(analyzer_name)
243
+ if self.analyzer_path:
244
+ analyzer_dir = (
245
+ Path(self.analyzer_path) / "analyzers" / analyzer_name_p
246
+ )
247
+ engine_arg = str(Path(self.analyzer_path) / analyzer_name_p)
248
+ else:
249
+ analyzer_dir = (
250
+ self.working_folder / "analyzers" / analyzer_name_p
251
+ )
252
+ engine_arg = str(analyzer_name_p)
253
+ self.engine.compile(engine_arg, develop, kb_only, analyzer_only)
254
+ return analyzer_dir
255
+
256
+ def cloud_compile(self, analyzer_name: str,
257
+ dispatcher_url: Optional[str] = None,
258
+ kb_only: bool = False,
259
+ analyzer_only: bool = False,
260
+ develop: bool = False,
261
+ poll_interval: float = 2.0,
262
+ timeout: float = 30 * 60,
263
+ skip_local_compile: bool = False) -> Path:
264
+ """End-to-end compile: codegen + cloud build + stage into bin/.
265
+
266
+ Runs :meth:`compile` to produce the analyzer's ``run/`` + ``kb/``
267
+ C++ trees (unless ``skip_local_compile=True``), packages them
268
+ plus an auto-generated ``StdAfx.h`` stub into a tarball, submits
269
+ that tarball to the public nlp-compile-service dispatcher, polls
270
+ for the GitHub-Actions runner build to complete, downloads the
271
+ resulting shared library, and stages it into
272
+ ``<analyzer>/bin/`` as ``run.<ext>`` and ``kb.<ext>`` (and the
273
+ Windows ``runu.<ext>`` / ``kbu.<ext>`` variants). After this
274
+ returns, :meth:`analyze` with ``compiled=True`` will load the
275
+ compiled artifact.
276
+
277
+ Returns the ``bin/`` directory path.
278
+
279
+ Args:
280
+ analyzer_name: analyzer under the engine's working folder.
281
+ dispatcher_url: override the public dispatcher endpoint
282
+ (default: ``cloud.DEFAULT_DISPATCHER_URL``).
283
+ kb_only: compile only the KB.
284
+ analyzer_only: compile only the analyzer rules (skip the KB).
285
+ Mutually exclusive with ``kb_only``.
286
+ develop: forwarded to local ``-COMPILE``.
287
+ poll_interval: seconds between job-status checks.
288
+ timeout: max seconds to wait for the runner build.
289
+ skip_local_compile: if True, assume ``run/`` and ``kb/``
290
+ already exist under the analyzer dir.
291
+ """
292
+ # Import here so the rest of the package keeps working in
293
+ # environments that don't have an `urllib`-friendly TLS stack.
294
+ from . import cloud
295
+ return cloud.cloud_compile(
296
+ self, analyzer_name,
297
+ dispatcher_url=dispatcher_url or cloud.DEFAULT_DISPATCHER_URL,
298
+ kb_only=kb_only, analyzer_only=analyzer_only, develop=develop,
299
+ poll_interval=poll_interval, timeout=timeout,
300
+ skip_local_compile=skip_local_compile,
301
+ )
302
+
303
+ def input_text(self, analyzer_name: str, file_name: str) -> str:
304
+ """Return the text from a file in the input directory."""
305
+ file_path = Path(self.analyzer_path) / analyzer_name / "input" / file_name
306
+ if not file_path.is_file():
307
+ raise EngineException(
308
+ f"File not found in input directory '{file_path}'"
309
+ )
310
+ with open(file_path, "rt", encoding="utf-8") as file:
311
+ text = file.read()
312
+ return text
313
+
314
+ def set_analyzers_folder(self, analyzer_name: str):
315
+ """Set analyzers directory path."""
316
+ self.analyzer_path = analyzer_name
317
+
318
+ def copy_library_analyzers(self, to_dir: str, overwrite: bool=True):
319
+ """Copy the library files to a directory."""
320
+ copy_it = True
321
+
322
+ if os.path.exists(to_dir):
323
+ if overwrite:
324
+ rmtree(to_dir)
325
+ else:
326
+ copy_it = False
327
+
328
+ if copy_it:
329
+ copytree(
330
+ Path(__file__).parent / "analyzers", Path(to_dir)
331
+ )
332
+ self.analyzer_path = str(to_dir)
333
+
334
+
335
+ engine = Engine()
336
+
337
+
338
+ def set_working_folder(working_folder: Optional[str] = None, initialize: bool = False):
339
+ """Reinitialize the NLP++ engine with a different working folder.
340
+
341
+ Args:
342
+
343
+ working_folder(str): Working folder to use, or `None` to use the
344
+ current working directory.
345
+ initialize(bool): Initialize the new working folder with the built-in
346
+ analyzers and data. (Optional, default=False)
347
+ """
348
+ global engine
349
+ if working_folder is None:
350
+ working_folder = getcwd()
351
+ engine = Engine(Path(working_folder), initialize=initialize)
352
+
353
+
354
+ def copy_library_analyzers(analyzer_folder_path: str, overwrite=True):
355
+ """Run the analyzer named on the input string."""
356
+ engine.copy_library_analyzers(analyzer_folder_path, overwrite)
357
+
358
+
359
+ def set_analyzers_folder(analyzer_folder_path: str):
360
+ """Run the analyzer named on the input string."""
361
+ engine.set_analyzers_folder(analyzer_folder_path)
362
+
363
+
364
+ def analyze(text: str, parser: str = "parse-en-us", develop: bool = False,
365
+ compiled: bool = False) -> str:
366
+ """Run the analyzer named on the input string.
367
+
368
+ If ``compiled=True``, the engine loads the analyzer's compiled
369
+ shared libraries (``bin/run.<ext>`` and ``bin/kb.<ext>``) instead of
370
+ running interpreted. See :func:`compile` for producing those.
371
+ """
372
+ return engine.analyze(text, parser, develop, compiled).output_text
373
+
374
+
375
+ def compile(analyzer: str = "parse-en-us", develop: bool = False,
376
+ kb_only: bool = False, analyzer_only: bool = False):
377
+ """Generate C++ source files for the named analyzer.
378
+
379
+ Wraps :meth:`Engine.compile`. The generated trees land under
380
+ ``<analyzer>/run/`` and ``<analyzer>/kb/`` inside the engine's
381
+ working folder (or just ``kb/`` for ``kb_only``, or just ``run/``
382
+ for ``analyzer_only``); they still need to be built into shared
383
+ libraries before :func:`analyze` can load them with
384
+ ``compiled=True``.
385
+ """
386
+ return engine.compile(analyzer, develop, kb_only, analyzer_only)
387
+
388
+
389
+ def cloud_compile(analyzer: str = "parse-en-us",
390
+ dispatcher_url: Optional[str] = None,
391
+ kb_only: bool = False,
392
+ analyzer_only: bool = False,
393
+ develop: bool = False,
394
+ poll_interval: float = 2.0,
395
+ timeout: float = 30 * 60,
396
+ skip_local_compile: bool = False):
397
+ """Compile an analyzer end-to-end via the public nlp-compile-service.
398
+
399
+ Wraps :meth:`Engine.cloud_compile` — see that method for the full
400
+ docstring. After this call returns, ``analyze(..., compiled=True)``
401
+ will pick up the staged shared libraries from the analyzer's
402
+ ``bin/`` directory.
403
+ """
404
+ return engine.cloud_compile(
405
+ analyzer, dispatcher_url=dispatcher_url, kb_only=kb_only,
406
+ analyzer_only=analyzer_only, develop=develop,
407
+ poll_interval=poll_interval, timeout=timeout,
408
+ skip_local_compile=skip_local_compile,
409
+ )
410
+
411
+
412
+ def input_text(analyzer_name: str, file_name: str):
413
+ """Return the text from a file in the input directory."""
414
+ return engine.intput_text(analyzer_name, file_name)
NLPPlus/_version.py ADDED
@@ -0,0 +1,24 @@
1
+ # file generated by vcs-versioning
2
+ # don't change, don't track in version control
3
+ from __future__ import annotations
4
+
5
+ __all__ = [
6
+ "__version__",
7
+ "__version_tuple__",
8
+ "version",
9
+ "version_tuple",
10
+ "__commit_id__",
11
+ "commit_id",
12
+ ]
13
+
14
+ version: str
15
+ __version__: str
16
+ __version_tuple__: tuple[int | str, ...]
17
+ version_tuple: tuple[int | str, ...]
18
+ commit_id: str | None
19
+ __commit_id__: str | None
20
+
21
+ __version__ = version = '2.0.22'
22
+ __version_tuple__ = version_tuple = (2, 0, 22)
23
+
24
+ __commit_id__ = commit_id = 'g756328b5f'
@@ -0,0 +1,40 @@
1
+ # Install at: VisualText/package-analyzers -> .github/workflows/dispatch-update-package-analyzers.yml
2
+ #
3
+ # Sender in the cross-repo percolation chain (see
4
+ # VisualText/nlp-engine/docs/PERCOLATION.md).
5
+ #
6
+ # When a release tag (v*) is pushed here, ping the repos that embed
7
+ # package-analyzers as a submodule so they refresh their pointer. Both the npm
8
+ # and Python nlpengine packages listen for the `package-analyzers-release` event
9
+ # and open a bump PR (they publish to npm/PyPI with their own versions, so a
10
+ # submodule bump must not auto-publish — a human merges the PR).
11
+ #
12
+ # Requires the shared `CLASSIC_PAT` secret in this repo (the default
13
+ # GITHUB_TOKEN cannot trigger workflows in other repos).
14
+
15
+ name: Dispatch update-package-analyzers
16
+
17
+ on:
18
+ push:
19
+ tags:
20
+ - 'v*'
21
+ workflow_dispatch:
22
+
23
+ jobs:
24
+ dispatch:
25
+ runs-on: ubuntu-latest
26
+ strategy:
27
+ fail-fast: false
28
+ matrix:
29
+ repo:
30
+ - VisualText/npm-package-nlpengine # embeds package-analyzers at path analyzers
31
+ - VisualText/py-package-nlpengine # embeds package-analyzers at path NLPPlus/analyzers
32
+ steps:
33
+ - name: Trigger ${{ matrix.repo }}
34
+ uses: peter-evans/repository-dispatch@v2
35
+ with:
36
+ token: ${{ secrets.CLASSIC_PAT }}
37
+ repository: ${{ matrix.repo }}
38
+ event-type: package-analyzers-release
39
+ client-payload: '{"tag_name": "${{ github.ref_name }}"}'
40
+ continue-on-error: true
@@ -0,0 +1,86 @@
1
+ # Install at: VisualText/package-analyzers -> .github/workflows/tag-on-push.yml
2
+ #
3
+ # Source of the percolation chain for DIRECT analyzer edits (see
4
+ # VisualText/nlp-engine/docs/PERCOLATION.md).
5
+ #
6
+ # When you edit an analyzer (address-parser, emailaddress, links, telephone) and
7
+ # push to main, this auto-computes the next semver tag and pushes it. Pushing the
8
+ # v* tag fires dispatch-update-package-analyzers.yml, which pings the npm and
9
+ # Python packages to bump + auto-publish.
10
+ #
11
+ # parse-en-us is intentionally NOT in the path filter: parse-en-us changes arrive
12
+ # via update-parse-en-us.yml (driven by parse-en-us-release), which tags itself.
13
+ # Keeping the two paths disjoint avoids double-tagging.
14
+ #
15
+ # Requires the shared CLASSIC_PAT secret (a GITHUB_TOKEN-pushed tag cannot
16
+ # trigger dispatch-update-package-analyzers.yml in turn).
17
+
18
+ name: Tag on push
19
+
20
+ on:
21
+ push:
22
+ branches: [main]
23
+ paths:
24
+ - 'address-parser/**'
25
+ - 'emailaddress/**'
26
+ - 'links/**'
27
+ - 'telephone/**'
28
+ workflow_dispatch:
29
+ inputs:
30
+ bump:
31
+ description: "Semver component to bump"
32
+ required: true
33
+ default: "patch"
34
+ type: choice
35
+ options: [patch, minor, major]
36
+
37
+ permissions:
38
+ contents: write
39
+
40
+ jobs:
41
+ tag:
42
+ runs-on: ubuntu-latest
43
+ steps:
44
+ - uses: actions/checkout@v4
45
+ with:
46
+ fetch-depth: 0
47
+ token: ${{ secrets.CLASSIC_PAT }}
48
+
49
+ - name: Configure git identity
50
+ run: |
51
+ git config user.name 'github-actions[bot]'
52
+ git config user.email '41898282+github-actions[bot]@users.noreply.github.com'
53
+
54
+ - name: Compute next version
55
+ id: version
56
+ run: |
57
+ set -euo pipefail
58
+ bump="${{ inputs.bump }}"
59
+ bump="${bump:-patch}" # push events have no inputs
60
+ latest=$(git tag --sort=-v:refname \
61
+ | grep -E '^v[0-9]+\.[0-9]+\.[0-9]+$' \
62
+ | head -n1 || true)
63
+ latest=${latest:-v0.0.0}
64
+ IFS='.' read -r major minor patch <<<"${latest#v}"
65
+ case "$bump" in
66
+ major) major=$((major + 1)); minor=0; patch=0 ;;
67
+ minor) minor=$((minor + 1)); patch=0 ;;
68
+ patch) patch=$((patch + 1)) ;;
69
+ esac
70
+ new="v${major}.${minor}.${patch}"
71
+ echo "new=$new" >> "$GITHUB_OUTPUT"
72
+ echo "Tagging $latest -> $new"
73
+
74
+ - name: Tag and push
75
+ run: |
76
+ set -euo pipefail
77
+ git tag -a "${{ steps.version.outputs.new }}" -m "Release ${{ steps.version.outputs.new }}"
78
+ git push origin "${{ steps.version.outputs.new }}"
79
+
80
+ - name: Create GitHub release
81
+ uses: softprops/action-gh-release@v2
82
+ with:
83
+ tag_name: ${{ steps.version.outputs.new }}
84
+ name: ${{ steps.version.outputs.new }}
85
+ generate_release_notes: true
86
+ token: ${{ secrets.CLASSIC_PAT || secrets.GITHUB_TOKEN }}
@@ -0,0 +1,130 @@
1
+ # Install at: VisualText/package-analyzers -> .github/workflows/update-parse-en-us.yml
2
+ #
3
+ # Listener in the cross-repo percolation chain (see
4
+ # VisualText/nlp-engine/docs/PERCOLATION.md).
5
+ #
6
+ # 1. Pulls the latest commit of the parse-en-us submodule's default branch (main).
7
+ # 2. Computes a new semver tag by bumping the latest vX.Y.Z tag.
8
+ # 3. Commits the submodule pointer change, tags, and pushes.
9
+ # 4. Cuts a GitHub release at the new tag. Pushing the v* tag fires
10
+ # dispatch-update-package-analyzers.yml, which pings the npm and Python
11
+ # packages to refresh their analyzers submodule.
12
+ #
13
+ # Triggered by parse-en-us (event-type parse-en-us-release) on its release, or
14
+ # by hand. A repository_dispatch ping carries no inputs, so the bump defaults to
15
+ # patch.
16
+
17
+ name: Update parse-en-us & Bump Version
18
+
19
+ on:
20
+ workflow_dispatch:
21
+ inputs:
22
+ bump:
23
+ description: "Semver component to bump"
24
+ required: true
25
+ default: "patch"
26
+ type: choice
27
+ options:
28
+ - patch
29
+ - minor
30
+ - major
31
+ create_release:
32
+ description: "Create a GitHub release at the new tag"
33
+ required: true
34
+ default: true
35
+ type: boolean
36
+ repository_dispatch:
37
+ types: [parse-en-us-release]
38
+
39
+ permissions:
40
+ contents: write
41
+
42
+ jobs:
43
+ update-and-bump:
44
+ runs-on: ubuntu-latest
45
+ steps:
46
+ - name: Checkout (with submodules and full history)
47
+ uses: actions/checkout@v4
48
+ with:
49
+ fetch-depth: 0
50
+ submodules: recursive
51
+ token: ${{ secrets.CLASSIC_PAT || secrets.GITHUB_TOKEN }}
52
+
53
+ - name: Configure git identity
54
+ run: |
55
+ git config user.name 'github-actions[bot]'
56
+ git config user.email '41898282+github-actions[bot]@users.noreply.github.com'
57
+
58
+ - name: Update parse-en-us to latest main
59
+ id: submodule
60
+ run: |
61
+ set -euo pipefail
62
+ before=$(git -C parse-en-us rev-parse HEAD)
63
+ git -C parse-en-us fetch origin main
64
+ git -C parse-en-us checkout origin/main
65
+ git -C parse-en-us submodule update --init --recursive
66
+ after=$(git -C parse-en-us rev-parse HEAD)
67
+ echo "before=$before" >> "$GITHUB_OUTPUT"
68
+ echo "after=$after" >> "$GITHUB_OUTPUT"
69
+ if [ "$before" = "$after" ]; then
70
+ echo "changed=false" >> "$GITHUB_OUTPUT"
71
+ echo "parse-en-us already at $after"
72
+ else
73
+ echo "changed=true" >> "$GITHUB_OUTPUT"
74
+ echo "parse-en-us: $before -> $after"
75
+ fi
76
+
77
+ - name: Compute next version
78
+ id: version
79
+ run: |
80
+ set -euo pipefail
81
+ bump="${{ inputs.bump }}"
82
+ bump="${bump:-patch}" # repository_dispatch has no inputs
83
+ latest=$(git tag --sort=-v:refname \
84
+ | grep -E '^v[0-9]+\.[0-9]+\.[0-9]+$' \
85
+ | head -n1 || true)
86
+ latest=${latest:-v0.0.0}
87
+ IFS='.' read -r major minor patch <<<"${latest#v}"
88
+ case "$bump" in
89
+ major) major=$((major + 1)); minor=0; patch=0 ;;
90
+ minor) minor=$((minor + 1)); patch=0 ;;
91
+ patch) patch=$((patch + 1)) ;;
92
+ esac
93
+ new="v${major}.${minor}.${patch}"
94
+ echo "previous=$latest" >> "$GITHUB_OUTPUT"
95
+ echo "new=$new" >> "$GITHUB_OUTPUT"
96
+ echo "Bumping $latest -> $new"
97
+
98
+ - name: Commit submodule update
99
+ if: steps.submodule.outputs.changed == 'true'
100
+ run: |
101
+ git add parse-en-us
102
+ git commit -m "Update parse-en-us to ${{ steps.submodule.outputs.after }} (${{ steps.version.outputs.new }})"
103
+
104
+ - name: Tag and push
105
+ run: |
106
+ set -euo pipefail
107
+ git tag -a "${{ steps.version.outputs.new }}" \
108
+ -m "Release ${{ steps.version.outputs.new }}"
109
+ git push origin HEAD
110
+ git push origin "${{ steps.version.outputs.new }}"
111
+
112
+ - name: Create GitHub release
113
+ if: ${{ github.event_name != 'workflow_dispatch' || inputs.create_release }}
114
+ uses: softprops/action-gh-release@v2
115
+ with:
116
+ tag_name: ${{ steps.version.outputs.new }}
117
+ name: ${{ steps.version.outputs.new }}
118
+ generate_release_notes: true
119
+ token: ${{ secrets.CLASSIC_PAT || secrets.GITHUB_TOKEN }}
120
+
121
+ - name: Summary
122
+ run: |
123
+ {
124
+ echo "### Update parse-en-us & bump version"
125
+ echo ""
126
+ echo "- previous tag: \`${{ steps.version.outputs.previous }}\`"
127
+ echo "- new tag: \`${{ steps.version.outputs.new }}\`"
128
+ echo "- parse-en-us changed: \`${{ steps.submodule.outputs.changed }}\`"
129
+ echo "- parse-en-us: \`${{ steps.submodule.outputs.before }}\` → \`${{ steps.submodule.outputs.after }}\`"
130
+ } >> "$GITHUB_STEP_SUMMARY"