PyPI - NLPPlus - Versions diffs - 2.0.22__cp313-cp313-win_amd64.whl - Mend

NLPPlus 2.0.22__cp313-cp313-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (465) hide show

NLPPlus/__init__.py +414 -0
NLPPlus/_version.py +24 -0
NLPPlus/analyzers/.github/workflows/dispatch-update-package-analyzers.yml +40 -0
NLPPlus/analyzers/.github/workflows/tag-on-push.yml +86 -0
NLPPlus/analyzers/.github/workflows/update-parse-en-us.yml +130 -0
NLPPlus/analyzers/.gitignore +5 -0
NLPPlus/analyzers/.gitmodules +3 -0
NLPPlus/analyzers/README.md +90 -0
NLPPlus/analyzers/address-parser/README.md +146 -0
NLPPlus/analyzers/address-parser/input/address-texts/01-contact.html +12 -0
NLPPlus/analyzers/address-parser/input/address-texts/02-store-locator.html +14 -0
NLPPlus/analyzers/address-parser/input/address-texts/03-realestate.html +19 -0
NLPPlus/analyzers/address-parser/input/address-texts/04-clinic.html +11 -0
NLPPlus/analyzers/address-parser/input/address-texts/05-university.html +11 -0
NLPPlus/analyzers/address-parser/input/address-texts/06-law-firm.html +11 -0
NLPPlus/analyzers/address-parser/input/address-texts/07-gov.html +11 -0
NLPPlus/analyzers/address-parser/input/address-texts/08-hotel.html +11 -0
NLPPlus/analyzers/address-parser/input/address-texts/09-museum.html +11 -0
NLPPlus/analyzers/address-parser/input/address-texts/10-bank.html +11 -0
NLPPlus/analyzers/address-parser/input/address-texts/11-readme.md +20 -0
NLPPlus/analyzers/address-parser/input/address-texts/12-directory.md +21 -0
NLPPlus/analyzers/address-parser/input/address-texts/13-event.md +21 -0
NLPPlus/analyzers/address-parser/input/address-texts/14-onboarding.md +24 -0
NLPPlus/analyzers/address-parser/input/address-texts/15-press-release.md +16 -0
NLPPlus/analyzers/address-parser/input/address-texts/16-faq.md +21 -0
NLPPlus/analyzers/address-parser/input/address-texts/17-tutorial.md +27 -0
NLPPlus/analyzers/address-parser/input/address-texts/18-suppliers.md +16 -0
NLPPlus/analyzers/address-parser/input/address-texts/19-shipping-policy.md +22 -0
NLPPlus/analyzers/address-parser/input/address-texts/20-mixed.md +17 -0
NLPPlus/analyzers/address-parser/input/address-texts/21-letter.txt +12 -0
NLPPlus/analyzers/address-parser/input/address-texts/22-address-book.txt +15 -0
NLPPlus/analyzers/address-parser/input/address-texts/23-shipping-label.txt +16 -0
NLPPlus/analyzers/address-parser/input/address-texts/24-classifieds.txt +14 -0
NLPPlus/analyzers/address-parser/input/address-texts/25-invoice.txt +16 -0
NLPPlus/analyzers/address-parser/input/address-texts/26-itinerary.txt +12 -0
NLPPlus/analyzers/address-parser/input/address-texts/27-resume.txt +14 -0
NLPPlus/analyzers/address-parser/input/address-texts/28-meeting-minutes.txt +11 -0
NLPPlus/analyzers/address-parser/input/address-texts/29-warranty.txt +14 -0
NLPPlus/analyzers/address-parser/input/address-texts/30-mixed-noise.txt +17 -0
NLPPlus/analyzers/address-parser/input/text.txt +24 -0
NLPPlus/analyzers/address-parser/kb/user/Country.dict +196 -0
NLPPlus/analyzers/address-parser/kb/user/address-synonym.dict +14 -0
NLPPlus/analyzers/address-parser/kb/user/attr.kb +254 -0
NLPPlus/analyzers/address-parser/kb/user/designator.dict +7 -0
NLPPlus/analyzers/address-parser/kb/user/directions.dict +16 -0
NLPPlus/analyzers/address-parser/kb/user/en-usa-states.dict +105 -0
NLPPlus/analyzers/address-parser/kb/user/en-usa-streetsuff.dict +548 -0
NLPPlus/analyzers/address-parser/kb/user/hier.kb +756 -0
NLPPlus/analyzers/address-parser/kb/user/military-address.dict +9 -0
NLPPlus/analyzers/address-parser/kb/user/phr.kb +3 -0
NLPPlus/analyzers/address-parser/kb/user/word.kb +57 -0
NLPPlus/analyzers/address-parser/spec/Changenumbers.nlp +17 -0
NLPPlus/analyzers/address-parser/spec/Grouping.nlp +17 -0
NLPPlus/analyzers/address-parser/spec/KBFuncs.nlp +694 -0
NLPPlus/analyzers/address-parser/spec/Lines.nlp +21 -0
NLPPlus/analyzers/address-parser/spec/PrecedingWords.nlp +40 -0
NLPPlus/analyzers/address-parser/spec/RemovePunct.nlp +15 -0
NLPPlus/analyzers/address-parser/spec/RemoveSpecialChars.nlp +30 -0
NLPPlus/analyzers/address-parser/spec/RemoveWhiteSpace.nlp +20 -0
NLPPlus/analyzers/address-parser/spec/analyzer.seq +18 -0
NLPPlus/analyzers/address-parser/spec/countryname.nlp +18 -0
NLPPlus/analyzers/address-parser/spec/funcs.nlp +141 -0
NLPPlus/analyzers/address-parser/spec/information.nlp +91 -0
NLPPlus/analyzers/address-parser/spec/information1.nlp +88 -0
NLPPlus/analyzers/address-parser/spec/information2.nlp +19 -0
NLPPlus/analyzers/address-parser/spec/information3.nlp +42 -0
NLPPlus/analyzers/address-parser/spec/kbdisp01.nlp +13 -0
NLPPlus/analyzers/address-parser/spec/kbinit.nlp +15 -0
NLPPlus/analyzers/address-parser/spec/kbmake.nlp +17 -0
NLPPlus/analyzers/address-parser/spec/output.nlp +13 -0
NLPPlus/analyzers/address-parser/spec/pincode.nlp +35 -0
NLPPlus/analyzers/address-parser/spec/removelines.nlp +17 -0
NLPPlus/analyzers/address-parser/tmp/README.md +1 -0
NLPPlus/analyzers/emailaddress/README.md +129 -0
NLPPlus/analyzers/emailaddress/input/email_texts/01-acme-contact.html +29 -0
NLPPlus/analyzers/emailaddress/input/email_texts/02-team-roster.html +20 -0
NLPPlus/analyzers/emailaddress/input/email_texts/03-startup-landing.html +23 -0
NLPPlus/analyzers/emailaddress/input/email_texts/04-university-dept.html +21 -0
NLPPlus/analyzers/emailaddress/input/email_texts/05-ecommerce-help.html +17 -0
NLPPlus/analyzers/emailaddress/input/email_texts/06-conference.html +17 -0
NLPPlus/analyzers/emailaddress/input/email_texts/07-blog-post.html +26 -0
NLPPlus/analyzers/emailaddress/input/email_texts/08-gov-services.html +15 -0
NLPPlus/analyzers/emailaddress/input/email_texts/09-forum-thread.html +17 -0
NLPPlus/analyzers/emailaddress/input/email_texts/10-newsletter.html +18 -0
NLPPlus/analyzers/emailaddress/input/email_texts/11-readme.md +20 -0
NLPPlus/analyzers/emailaddress/input/email_texts/12-meeting-notes.md +17 -0
NLPPlus/analyzers/emailaddress/input/email_texts/13-job-listing.md +21 -0
NLPPlus/analyzers/emailaddress/input/email_texts/14-changelog.md +19 -0
NLPPlus/analyzers/emailaddress/input/email_texts/15-faq.md +24 -0
NLPPlus/analyzers/emailaddress/input/email_texts/16-press-release.md +18 -0
NLPPlus/analyzers/emailaddress/input/email_texts/17-tutorial.md +23 -0
NLPPlus/analyzers/emailaddress/input/email_texts/18-product-docs.md +25 -0
NLPPlus/analyzers/emailaddress/input/email_texts/19-community-guidelines.md +23 -0
NLPPlus/analyzers/emailaddress/input/email_texts/20-recipe-blog.md +17 -0
NLPPlus/analyzers/emailaddress/input/email_texts/21-email-signature.txt +9 -0
NLPPlus/analyzers/emailaddress/input/email_texts/22-contact-list.txt +20 -0
NLPPlus/analyzers/emailaddress/input/email_texts/23-support-ticket.txt +17 -0
NLPPlus/analyzers/emailaddress/input/email_texts/24-classifieds.txt +15 -0
NLPPlus/analyzers/emailaddress/input/email_texts/25-meeting-minutes.txt +17 -0
NLPPlus/analyzers/emailaddress/input/email_texts/26-product-reviews.txt +16 -0
NLPPlus/analyzers/emailaddress/input/email_texts/27-event-invite.txt +16 -0
NLPPlus/analyzers/emailaddress/input/email_texts/28-bug-report.txt +19 -0
NLPPlus/analyzers/emailaddress/input/email_texts/29-travel-itinerary.txt +15 -0
NLPPlus/analyzers/emailaddress/input/email_texts/30-mixed-noise.txt +19 -0
NLPPlus/analyzers/emailaddress/input/email_variations.txt +155 -0
NLPPlus/analyzers/emailaddress/input/text.txt +30 -0
NLPPlus/analyzers/emailaddress/kb/user/attr.kb +254 -0
NLPPlus/analyzers/emailaddress/kb/user/charactders.dict +39 -0
NLPPlus/analyzers/emailaddress/kb/user/country.dict +265 -0
NLPPlus/analyzers/emailaddress/kb/user/domain.dict +1134 -0
NLPPlus/analyzers/emailaddress/kb/user/hier.kb +756 -0
NLPPlus/analyzers/emailaddress/kb/user/phr.kb +3 -0
NLPPlus/analyzers/emailaddress/kb/user/word.kb +57 -0
NLPPlus/analyzers/emailaddress/spec/EmailZone.nlp +31 -0
NLPPlus/analyzers/emailaddress/spec/EmailZoneAt.nlp +26 -0
NLPPlus/analyzers/emailaddress/spec/KBFuncs.nlp +694 -0
NLPPlus/analyzers/emailaddress/spec/Lines.nlp +21 -0
NLPPlus/analyzers/emailaddress/spec/analyzer.seq +20 -0
NLPPlus/analyzers/emailaddress/spec/comment.nlp +18 -0
NLPPlus/analyzers/emailaddress/spec/commentRemove.nlp +17 -0
NLPPlus/analyzers/emailaddress/spec/email0.nlp +109 -0
NLPPlus/analyzers/emailaddress/spec/email0at.nlp +134 -0
NLPPlus/analyzers/emailaddress/spec/email0z.nlp +18 -0
NLPPlus/analyzers/emailaddress/spec/email1.nlp +146 -0
NLPPlus/analyzers/emailaddress/spec/email11.nlp +62 -0
NLPPlus/analyzers/emailaddress/spec/email12.nlp +17 -0
NLPPlus/analyzers/emailaddress/spec/email2.nlp +17 -0
NLPPlus/analyzers/emailaddress/spec/email3.nlp +20 -0
NLPPlus/analyzers/emailaddress/spec/email4.nlp +20 -0
NLPPlus/analyzers/emailaddress/spec/email5.nlp +19 -0
NLPPlus/analyzers/emailaddress/spec/email6.nlp +31 -0
NLPPlus/analyzers/emailaddress/spec/email7.nlp +20 -0
NLPPlus/analyzers/emailaddress/spec/emailChars1.nlp +16 -0
NLPPlus/analyzers/emailaddress/spec/funcs.nlp +143 -0
NLPPlus/analyzers/emailaddress/spec/kbdisp01.nlp +13 -0
NLPPlus/analyzers/emailaddress/spec/kbinit.nlp +14 -0
NLPPlus/analyzers/emailaddress/spec/output.nlp +13 -0
NLPPlus/analyzers/emailaddress/tmp/README.md +1 -0
NLPPlus/analyzers/links/README.md +138 -0
NLPPlus/analyzers/links/input/links-texts/01-resources.html +18 -0
NLPPlus/analyzers/links/input/links-texts/02-news-portal.html +19 -0
NLPPlus/analyzers/links/input/links-texts/03-store.html +14 -0
NLPPlus/analyzers/links/input/links-texts/04-docs.html +16 -0
NLPPlus/analyzers/links/input/links-texts/05-university.html +15 -0
NLPPlus/analyzers/links/input/links-texts/06-portfolio.html +16 -0
NLPPlus/analyzers/links/input/links-texts/07-gov.html +14 -0
NLPPlus/analyzers/links/input/links-texts/08-recipes.html +15 -0
NLPPlus/analyzers/links/input/links-texts/09-conference.html +14 -0
NLPPlus/analyzers/links/input/links-texts/10-help-center.html +17 -0
NLPPlus/analyzers/links/input/links-texts/11-readme.md +22 -0
NLPPlus/analyzers/links/input/links-texts/12-link-roundup.md +20 -0
NLPPlus/analyzers/links/input/links-texts/13-contributing.md +23 -0
NLPPlus/analyzers/links/input/links-texts/14-changelog.md +19 -0
NLPPlus/analyzers/links/input/links-texts/15-awesome-list.md +24 -0
NLPPlus/analyzers/links/input/links-texts/16-tutorial.md +24 -0
NLPPlus/analyzers/links/input/links-texts/17-bookmarks.md +24 -0
NLPPlus/analyzers/links/input/links-texts/18-press-release.md +20 -0
NLPPlus/analyzers/links/input/links-texts/19-security-advisory.md +23 -0
NLPPlus/analyzers/links/input/links-texts/20-link-dump.md +20 -0
NLPPlus/analyzers/links/input/links-texts/21-signature.txt +9 -0
NLPPlus/analyzers/links/input/links-texts/22-bookmark-export.txt +15 -0
NLPPlus/analyzers/links/input/links-texts/23-support-ticket.txt +16 -0
NLPPlus/analyzers/links/input/links-texts/24-classifieds.txt +15 -0
NLPPlus/analyzers/links/input/links-texts/25-meeting-notes.txt +16 -0
NLPPlus/analyzers/links/input/links-texts/26-reviews.txt +15 -0
NLPPlus/analyzers/links/input/links-texts/27-itinerary.txt +14 -0
NLPPlus/analyzers/links/input/links-texts/28-bug-report.txt +18 -0
NLPPlus/analyzers/links/input/links-texts/29-syllabus.txt +15 -0
NLPPlus/analyzers/links/input/links-texts/30-mixed-noise.txt +19 -0
NLPPlus/analyzers/links/input/text.txt +19 -0
NLPPlus/analyzers/links/kb/user/attr.kb +254 -0
NLPPlus/analyzers/links/kb/user/country.dict +264 -0
NLPPlus/analyzers/links/kb/user/domain.dict +1134 -0
NLPPlus/analyzers/links/kb/user/hier.kb +756 -0
NLPPlus/analyzers/links/kb/user/phr.kb +3 -0
NLPPlus/analyzers/links/kb/user/schemelist.dict +39 -0
NLPPlus/analyzers/links/kb/user/word.kb +57 -0
NLPPlus/analyzers/links/spec/KBFuncs.nlp +694 -0
NLPPlus/analyzers/links/spec/Lines.nlp +21 -0
NLPPlus/analyzers/links/spec/Link1.nlp +17 -0
NLPPlus/analyzers/links/spec/Link2.nlp +27 -0
NLPPlus/analyzers/links/spec/Link3.nlp +16 -0
NLPPlus/analyzers/links/spec/Link4.nlp +32 -0
NLPPlus/analyzers/links/spec/Link5.nlp +115 -0
NLPPlus/analyzers/links/spec/Link6.nlp +76 -0
NLPPlus/analyzers/links/spec/Link7.nlp +24 -0
NLPPlus/analyzers/links/spec/Link8.nlp +12 -0
NLPPlus/analyzers/links/spec/LinkZone.nlp +46 -0
NLPPlus/analyzers/links/spec/Links.nlp +52 -0
NLPPlus/analyzers/links/spec/Links3.nlp +21 -0
NLPPlus/analyzers/links/spec/Links41.nlp +30 -0
NLPPlus/analyzers/links/spec/RemoveWhiteSpace.nlp +20 -0
NLPPlus/analyzers/links/spec/analyzer.seq +16 -0
NLPPlus/analyzers/links/spec/funcs.nlp +88 -0
NLPPlus/analyzers/links/spec/kbDisplay.nlp +13 -0
NLPPlus/analyzers/links/spec/kbdisp00.nlp +11 -0
NLPPlus/analyzers/links/spec/kbinit.nlp +13 -0
NLPPlus/analyzers/links/spec/links1.nlp +14 -0
NLPPlus/analyzers/links/spec/linksphno.nlp +34 -0
NLPPlus/analyzers/links/spec/output.nlp +13 -0
NLPPlus/analyzers/links/spec/removeblankline.nlp +15 -0
NLPPlus/analyzers/links/tmp/README.md +1 -0
NLPPlus/analyzers/parse-en-us/.github/workflows/dispatch-update-parse-en-us.yml +37 -0
NLPPlus/analyzers/parse-en-us/LICENSE +21 -0
NLPPlus/analyzers/parse-en-us/README.md +15 -0
NLPPlus/analyzers/parse-en-us/input/business.txt +37 -0
NLPPlus/analyzers/parse-en-us/input/doj.txt +7 -0
NLPPlus/analyzers/parse-en-us/input/test.txt +1 -0
NLPPlus/analyzers/parse-en-us/kb/user/all.dict +191543 -0
NLPPlus/analyzers/parse-en-us/kb/user/attr.kb +254 -0
NLPPlus/analyzers/parse-en-us/kb/user/hier.kb +774 -0
NLPPlus/analyzers/parse-en-us/kb/user/phr.kb +3 -0
NLPPlus/analyzers/parse-en-us/kb/user/word.kb +39 -0
NLPPlus/analyzers/parse-en-us/spec/CYCLE_0.nlp +14 -0
NLPPlus/analyzers/parse-en-us/spec/CYCLE_1.nlp +12 -0
NLPPlus/analyzers/parse-en-us/spec/CYCLE_2.nlp +14 -0
NLPPlus/analyzers/parse-en-us/spec/KBFuncs.nlp +304 -0
NLPPlus/analyzers/parse-en-us/spec/LINES_nosp.nlp +30 -0
NLPPlus/analyzers/parse-en-us/spec/Lines.nlp +21 -0
NLPPlus/analyzers/parse-en-us/spec/analyzer.seq +136 -0
NLPPlus/analyzers/parse-en-us/spec/anaphora100.nlp +160 -0
NLPPlus/analyzers/parse-en-us/spec/buff_out.nlp +116 -0
NLPPlus/analyzers/parse-en-us/spec/caps100.nlp +173 -0
NLPPlus/analyzers/parse-en-us/spec/caps50.nlp +654 -0
NLPPlus/analyzers/parse-en-us/spec/city.nlp +1247 -0
NLPPlus/analyzers/parse-en-us/spec/clause100.nlp +32 -0
NLPPlus/analyzers/parse-en-us/spec/clause200.nlp +24 -0
NLPPlus/analyzers/parse-en-us/spec/clause300.nlp +174 -0
NLPPlus/analyzers/parse-en-us/spec/clause400.nlp +105 -0
NLPPlus/analyzers/parse-en-us/spec/clause_out.nlp +36 -0
NLPPlus/analyzers/parse-en-us/spec/clause_pos.nlp +100 -0
NLPPlus/analyzers/parse-en-us/spec/clausesem.nlp +28 -0
NLPPlus/analyzers/parse-en-us/spec/common.nlp +2905 -0
NLPPlus/analyzers/parse-en-us/spec/country.nlp +79 -0
NLPPlus/analyzers/parse-en-us/spec/date.nlp +269 -0
NLPPlus/analyzers/parse-en-us/spec/dictfix.nlp +772 -0
NLPPlus/analyzers/parse-en-us/spec/displayKB.nlp +13 -0
NLPPlus/analyzers/parse-en-us/spec/doc.nlp +151 -0
NLPPlus/analyzers/parse-en-us/spec/dom100.nlp +154 -0
NLPPlus/analyzers/parse-en-us/spec/dom150.nlp +243 -0
NLPPlus/analyzers/parse-en-us/spec/dom175.nlp +592 -0
NLPPlus/analyzers/parse-en-us/spec/dom200.nlp +2810 -0
NLPPlus/analyzers/parse-en-us/spec/domfuns.nlp +170 -0
NLPPlus/analyzers/parse-en-us/spec/dqan100.nlp +442 -0
NLPPlus/analyzers/parse-en-us/spec/dqan200.nlp +181 -0
NLPPlus/analyzers/parse-en-us/spec/entity100.nlp +314 -0
NLPPlus/analyzers/parse-en-us/spec/feat.nlp +37 -0
NLPPlus/analyzers/parse-en-us/spec/feat100.nlp +248 -0
NLPPlus/analyzers/parse-en-us/spec/fin.nlp +59 -0
NLPPlus/analyzers/parse-en-us/spec/fnretok.nlp +187 -0
NLPPlus/analyzers/parse-en-us/spec/fnword.nlp +1411 -0
NLPPlus/analyzers/parse-en-us/spec/funs.nlp +5923 -0
NLPPlus/analyzers/parse-en-us/spec/hilite_alpha.nlp +22 -0
NLPPlus/analyzers/parse-en-us/spec/hilite_clause.nlp +42 -0
NLPPlus/analyzers/parse-en-us/spec/hilite_ne.nlp +30 -0
NLPPlus/analyzers/parse-en-us/spec/hilite_np.nlp +27 -0
NLPPlus/analyzers/parse-en-us/spec/hilite_pos.nlp +24 -0
NLPPlus/analyzers/parse-en-us/spec/hilite_postag.nlp +26 -0
NLPPlus/analyzers/parse-en-us/spec/hilite_spatial.nlp +35 -0
NLPPlus/analyzers/parse-en-us/spec/hilite_temporal.nlp +41 -0
NLPPlus/analyzers/parse-en-us/spec/hilite_undone.nlp +33 -0
NLPPlus/analyzers/parse-en-us/spec/inc100.nlp +917 -0
NLPPlus/analyzers/parse-en-us/spec/inc300.nlp +15 -0
NLPPlus/analyzers/parse-en-us/spec/incap100.nlp +165 -0
NLPPlus/analyzers/parse-en-us/spec/inclausesem.nlp +44 -0
NLPPlus/analyzers/parse-en-us/spec/ini.nlp +139 -0
NLPPlus/analyzers/parse-en-us/spec/jdfasd.nlp +18 -0
NLPPlus/analyzers/parse-en-us/spec/kb_geo.nlp +62 -0
NLPPlus/analyzers/parse-en-us/spec/kb_onto.nlp +35 -0
NLPPlus/analyzers/parse-en-us/spec/kbload.nlp +2597 -0
NLPPlus/analyzers/parse-en-us/spec/lex_unambig.nlp +67 -0
NLPPlus/analyzers/parse-en-us/spec/lex_unambig2.nlp +54 -0
NLPPlus/analyzers/parse-en-us/spec/location100.nlp +179 -0
NLPPlus/analyzers/parse-en-us/spec/lookup_word.nlp +84 -0
NLPPlus/analyzers/parse-en-us/spec/mhbv100.nlp +404 -0
NLPPlus/analyzers/parse-en-us/spec/ne_out.nlp +92 -0
NLPPlus/analyzers/parse-en-us/spec/ne_xml.nlp +85 -0
NLPPlus/analyzers/parse-en-us/spec/num.nlp +662 -0
NLPPlus/analyzers/parse-en-us/spec/oldsemfuns.nlp +1004 -0
NLPPlus/analyzers/parse-en-us/spec/phr100.nlp +420 -0
NLPPlus/analyzers/parse-en-us/spec/phr50.nlp +160 -0
NLPPlus/analyzers/parse-en-us/spec/pos10.nlp +168 -0
NLPPlus/analyzers/parse-en-us/spec/pos100.nlp +466 -0
NLPPlus/analyzers/parse-en-us/spec/pos200.nlp +694 -0
NLPPlus/analyzers/parse-en-us/spec/pos25.nlp +3559 -0
NLPPlus/analyzers/parse-en-us/spec/pos300.nlp +218 -0
NLPPlus/analyzers/parse-en-us/spec/pos400.nlp +236 -0
NLPPlus/analyzers/parse-en-us/spec/pos50.nlp +17226 -0
NLPPlus/analyzers/parse-en-us/spec/pos75.nlp +294 -0
NLPPlus/analyzers/parse-en-us/spec/pos_out.nlp +134 -0
NLPPlus/analyzers/parse-en-us/spec/pos_out_noscore - Copy.nlp +210 -0
NLPPlus/analyzers/parse-en-us/spec/pos_out_noscore.nlp +266 -0
NLPPlus/analyzers/parse-en-us/spec/poserr.nlp +22 -0
NLPPlus/analyzers/parse-en-us/spec/posfuns.nlp +2501 -0
NLPPlus/analyzers/parse-en-us/spec/pre_100.nlp +38 -0
NLPPlus/analyzers/parse-en-us/spec/pre_200.nlp +47 -0
NLPPlus/analyzers/parse-en-us/spec/pre_300.nlp +35 -0
NLPPlus/analyzers/parse-en-us/spec/pre_400.nlp +39 -0
NLPPlus/analyzers/parse-en-us/spec/pre_500.nlp +43 -0
NLPPlus/analyzers/parse-en-us/spec/pre_600.nlp +21 -0
NLPPlus/analyzers/parse-en-us/spec/pre_badeos.nlp +33 -0
NLPPlus/analyzers/parse-en-us/spec/pre_form.nlp +19 -0
NLPPlus/analyzers/parse-en-us/spec/pre_zap.nlp +28 -0
NLPPlus/analyzers/parse-en-us/spec/pre_zap1.nlp +21 -0
NLPPlus/analyzers/parse-en-us/spec/pre_zap2.nlp +29 -0
NLPPlus/analyzers/parse-en-us/spec/punct100.nlp +416 -0
NLPPlus/analyzers/parse-en-us/spec/punct200.nlp +52 -0
NLPPlus/analyzers/parse-en-us/spec/qclause100.nlp +495 -0
NLPPlus/analyzers/parse-en-us/spec/qclause200.nlp +57 -0
NLPPlus/analyzers/parse-en-us/spec/qclause300.nlp +39 -0
NLPPlus/analyzers/parse-en-us/spec/qclause50.nlp +465 -0
NLPPlus/analyzers/parse-en-us/spec/qclause75.nlp +78 -0
NLPPlus/analyzers/parse-en-us/spec/qconj100.nlp +78 -0
NLPPlus/analyzers/parse-en-us/spec/qconj200.nlp +92 -0
NLPPlus/analyzers/parse-en-us/spec/qline100.nlp +49 -0
NLPPlus/analyzers/parse-en-us/spec/qseg100.nlp +134 -0
NLPPlus/analyzers/parse-en-us/spec/qseg125.nlp +40 -0
NLPPlus/analyzers/parse-en-us/spec/qseg150.nlp +115 -0
NLPPlus/analyzers/parse-en-us/spec/qseg200.nlp +126 -0
NLPPlus/analyzers/parse-en-us/spec/qsemfuns.nlp +63 -0
NLPPlus/analyzers/parse-en-us/spec/qsent100.nlp +178 -0
NLPPlus/analyzers/parse-en-us/spec/qsent50.nlp +522 -0
NLPPlus/analyzers/parse-en-us/spec/qsent75.nlp +157 -0
NLPPlus/analyzers/parse-en-us/spec/quotes100.nlp +61 -0
NLPPlus/analyzers/parse-en-us/spec/rule_out.nlp +161 -0
NLPPlus/analyzers/parse-en-us/spec/seg100.nlp +335 -0
NLPPlus/analyzers/parse-en-us/spec/semfuns.nlp +2104 -0
NLPPlus/analyzers/parse-en-us/spec/sent100.nlp +44 -0
NLPPlus/analyzers/parse-en-us/spec/sent125.nlp +23 -0
NLPPlus/analyzers/parse-en-us/spec/sent200.nlp +97 -0
NLPPlus/analyzers/parse-en-us/spec/sent250.nlp +54 -0
NLPPlus/analyzers/parse-en-us/spec/sent300.nlp +32 -0
NLPPlus/analyzers/parse-en-us/spec/sent400.nlp +525 -0
NLPPlus/analyzers/parse-en-us/spec/sent50.nlp +24 -0
NLPPlus/analyzers/parse-en-us/spec/sent_pos.nlp +105 -0
NLPPlus/analyzers/parse-en-us/spec/sentsem.nlp +27 -0
NLPPlus/analyzers/parse-en-us/spec/study100.nlp +54 -0
NLPPlus/analyzers/parse-en-us/spec/tag100.nlp +100 -0
NLPPlus/analyzers/parse-en-us/spec/tag50.nlp +28 -0
NLPPlus/analyzers/parse-en-us/spec/tags100.nlp +60 -0
NLPPlus/analyzers/parse-en-us/spec/textzone100.nlp +40 -0
NLPPlus/analyzers/parse-en-us/spec/textzone50.nlp +24 -0
NLPPlus/analyzers/parse-en-us/spec/tmp.seq +134 -0
NLPPlus/analyzers/parse-en-us/spec/tok10.nlp +86 -0
NLPPlus/analyzers/parse-en-us/spec/tok100.nlp +451 -0
NLPPlus/analyzers/parse-en-us/spec/tok150.nlp +61 -0
NLPPlus/analyzers/parse-en-us/spec/tok175.nlp +637 -0
NLPPlus/analyzers/parse-en-us/spec/tok200.nlp +154 -0
NLPPlus/analyzers/parse-en-us/spec/tok25.nlp +24 -0
NLPPlus/analyzers/parse-en-us/spec/tok50.nlp +645 -0
NLPPlus/analyzers/parse-en-us/spec/trav_dict.nlp +19 -0
NLPPlus/analyzers/parse-en-us/spec/xmlfns.nlp +69 -0
NLPPlus/analyzers/parse-en-us/spec/xmlrecursive.nlp +283 -0
NLPPlus/analyzers/parse-en-us/spec/zap100.nlp +39 -0
NLPPlus/analyzers/parse-en-us/spec/zaplines.nlp +21 -0
NLPPlus/analyzers/parse-en-us/spec/zapwhite.nlp +20 -0
NLPPlus/analyzers/parse-en-us/tmp/README.md +1 -0
NLPPlus/analyzers/telephone/README.md +65 -0
NLPPlus/analyzers/telephone/input/phone_variations.txt +262 -0
NLPPlus/analyzers/telephone/input/telephone-texts/01-acme-contact.html +19 -0
NLPPlus/analyzers/telephone/input/telephone-texts/02-store-locator.html +16 -0
NLPPlus/analyzers/telephone/input/telephone-texts/03-restaurant.html +12 -0
NLPPlus/analyzers/telephone/input/telephone-texts/04-clinic.html +13 -0
NLPPlus/analyzers/telephone/input/telephone-texts/05-realestate.html +19 -0
NLPPlus/analyzers/telephone/input/telephone-texts/06-support.html +16 -0
NLPPlus/analyzers/telephone/input/telephone-texts/07-law-firm.html +13 -0
NLPPlus/analyzers/telephone/input/telephone-texts/08-school.html +13 -0
NLPPlus/analyzers/telephone/input/telephone-texts/09-hotel.html +13 -0
NLPPlus/analyzers/telephone/input/telephone-texts/10-newsroom.html +14 -0
NLPPlus/analyzers/telephone/input/telephone-texts/11-readme.md +20 -0
NLPPlus/analyzers/telephone/input/telephone-texts/12-meeting-notes.md +16 -0
NLPPlus/analyzers/telephone/input/telephone-texts/13-product-docs.md +18 -0
NLPPlus/analyzers/telephone/input/telephone-texts/14-changelog.md +17 -0
NLPPlus/analyzers/telephone/input/telephone-texts/15-faq.md +19 -0
NLPPlus/analyzers/telephone/input/telephone-texts/16-press-release.md +17 -0
NLPPlus/analyzers/telephone/input/telephone-texts/17-tutorial.md +20 -0
NLPPlus/analyzers/telephone/input/telephone-texts/18-directory.md +21 -0
NLPPlus/analyzers/telephone/input/telephone-texts/19-onboarding.md +19 -0
NLPPlus/analyzers/telephone/input/telephone-texts/20-event.md +19 -0
NLPPlus/analyzers/telephone/input/telephone-texts/21-signature.txt +9 -0
NLPPlus/analyzers/telephone/input/telephone-texts/22-phone-list.txt +18 -0
NLPPlus/analyzers/telephone/input/telephone-texts/23-support-ticket.txt +13 -0
NLPPlus/analyzers/telephone/input/telephone-texts/24-classifieds.txt +14 -0
NLPPlus/analyzers/telephone/input/telephone-texts/25-meeting-minutes.txt +13 -0
NLPPlus/analyzers/telephone/input/telephone-texts/26-reviews.txt +14 -0
NLPPlus/analyzers/telephone/input/telephone-texts/27-itinerary.txt +13 -0
NLPPlus/analyzers/telephone/input/telephone-texts/28-bug-report.txt +17 -0
NLPPlus/analyzers/telephone/input/telephone-texts/29-utility.txt +12 -0
NLPPlus/analyzers/telephone/input/telephone-texts/30-mixed-noise.txt +18 -0
NLPPlus/analyzers/telephone/input/text.txt +27 -0
NLPPlus/analyzers/telephone/kb/user/attr.kb +254 -0
NLPPlus/analyzers/telephone/kb/user/hier.kb +756 -0
NLPPlus/analyzers/telephone/kb/user/phr.kb +3 -0
NLPPlus/analyzers/telephone/kb/user/tel-country-codes.kbb +206 -0
NLPPlus/analyzers/telephone/kb/user/word.kb +57 -0
NLPPlus/analyzers/telephone/spec/KBFuncs.nlp +694 -0
NLPPlus/analyzers/telephone/spec/Lines.nlp +21 -0
NLPPlus/analyzers/telephone/spec/Telep0.nlp +69 -0
NLPPlus/analyzers/telephone/spec/Telep1.nlp +180 -0
NLPPlus/analyzers/telephone/spec/Telep1b.nlp +31 -0
NLPPlus/analyzers/telephone/spec/Telep2.nlp +18 -0
NLPPlus/analyzers/telephone/spec/TelepIntl.nlp +45 -0
NLPPlus/analyzers/telephone/spec/TelepIntlCat.nlp +55 -0
NLPPlus/analyzers/telephone/spec/TelephoneZone.nlp +36 -0
NLPPlus/analyzers/telephone/spec/analyzer.seq +13 -0
NLPPlus/analyzers/telephone/spec/funcs.nlp +121 -0
NLPPlus/analyzers/telephone/spec/kbdisp01.nlp +13 -0
NLPPlus/analyzers/telephone/spec/kbinit.nlp +16 -0
NLPPlus/analyzers/telephone/spec/output.nlp +13 -0
NLPPlus/analyzers/telephone/spec/tele1.nlp +15 -0
NLPPlus/analyzers/telephone/tmp/README.md +1 -0
NLPPlus/bindings.cp313-win_amd64.pyd +0 -0
NLPPlus/cloud.py +482 -0
NLPPlus/data/rfb/spec/Copy of decl.nlp +15 -0
NLPPlus/data/rfb/spec/actions.nlp +44 -0
NLPPlus/data/rfb/spec/analyzer.seq +37 -0
NLPPlus/data/rfb/spec/bigtok.nlp +132 -0
NLPPlus/data/rfb/spec/checks.nlp +17 -0
NLPPlus/data/rfb/spec/code.nlp +36 -0
NLPPlus/data/rfb/spec/components.nlp +60 -0
NLPPlus/data/rfb/spec/decl.nlp +36 -0
NLPPlus/data/rfb/spec/decls.nlp +37 -0
NLPPlus/data/rfb/spec/element.nlp +19 -0
NLPPlus/data/rfb/spec/finalerr.nlp +22 -0
NLPPlus/data/rfb/spec/gram1.nlp +50 -0
NLPPlus/data/rfb/spec/gram2-saf.nlp +250 -0
NLPPlus/data/rfb/spec/gram2.nlp +484 -0
NLPPlus/data/rfb/spec/gram3.nlp +11 -0
NLPPlus/data/rfb/spec/gram4.nlp +51 -0
NLPPlus/data/rfb/spec/gram5.nlp +99 -0
NLPPlus/data/rfb/spec/list.nlp +30 -0
NLPPlus/data/rfb/spec/list1.nlp +44 -0
NLPPlus/data/rfb/spec/multi.nlp +15 -0
NLPPlus/data/rfb/spec/nlppp.nlp +129 -0
NLPPlus/data/rfb/spec/nodes.nlp +15 -0
NLPPlus/data/rfb/spec/pair.nlp +41 -0
NLPPlus/data/rfb/spec/pairs.nlp +28 -0
NLPPlus/data/rfb/spec/path.nlp +15 -0
NLPPlus/data/rfb/spec/posts.nlp +16 -0
NLPPlus/data/rfb/spec/preaction.nlp +16 -0
NLPPlus/data/rfb/spec/pres.nlp +17 -0
NLPPlus/data/rfb/spec/recurse.nlp +15 -0
NLPPlus/data/rfb/spec/recurses.nlp +15 -0
NLPPlus/data/rfb/spec/region.nlp +16 -0
NLPPlus/data/rfb/spec/regions.nlp +15 -0
NLPPlus/data/rfb/spec/retok.nlp +78 -0
NLPPlus/data/rfb/spec/rule.nlp +80 -0
NLPPlus/data/rfb/spec/rules.nlp +17 -0
NLPPlus/data/rfb/spec/rulesfile.nlp +29 -0
NLPPlus/data/rfb/spec/select.nlp +17 -0
NLPPlus/data/rfb/spec/tmp.nlp +22 -0
NLPPlus/data/rfb/spec/tmp.seq +36 -0
NLPPlus/data/rfb/spec/tmp1.nlp +16 -0
NLPPlus/data/rfb/spec/un_mark.nlp +16 -0
NLPPlus/data/rfb/spec/x_commas.nlp +15 -0
NLPPlus/data/rfb/spec/x_white.nlp +15 -0
NLPPlus/data/rfb/spec/xvar.nlp +21 -0
nlpplus-2.0.22.dist-info/DELVEWHEEL +2 -0
nlpplus-2.0.22.dist-info/METADATA +476 -0
nlpplus-2.0.22.dist-info/RECORD +465 -0
nlpplus-2.0.22.dist-info/WHEEL +5 -0
nlpplus-2.0.22.dist-info/licenses/LICENSE +21 -0
nlpplus.libs/icudt78-ef378328df9c3fcff89cc09bc1ae7038.dll +0 -0
nlpplus.libs/icuuc78-a9fab54320205de057cb60aeb15f5668.dll +0 -0
nlpplus.libs/msvcp140-a4c2229bdc2a2a630acdc095b4d86008.dll +0 -0

NLPPlus/__init__.py ADDED Viewed

@@ -0,0 +1,414 @@
+"""Python extension for NLP++ text analysis engine.
+Basic usage:
+    import NLPPlus
+    xml = NLPPlus.analyze("This is some text to be parsed")
+    print(xml)
+"""
+# start delvewheel patch
+def _delvewheel_patch_1_13_0():
+    import os
+    if os.path.isdir(libs_dir := os.path.abspath(os.path.join(os.path.dirname(__file__), os.pardir, 'nlpplus.libs'))):
+        os.add_dll_directory(libs_dir)
+_delvewheel_patch_1_13_0()
+del _delvewheel_patch_1_13_0
+# end delvewheel patch
+import json
+import logging
+from shutil import copytree, rmtree
+from tempfile import TemporaryDirectory
+from os import PathLike, getcwd
+from pathlib import Path
+from typing import Optional, Any
+import os
+import glob
+from .bindings import NLP_ENGINE  # type: ignore
+LOGGER = logging.getLogger("NLPPlus")
+def maybe_readfile(path: Path) -> Optional[str]:
+    """Bogus utility function to maybe read a file."""
+    if not path.exists():
+        return None
+    with open(path, "rt") as infh:
+        return infh.read()
+class EngineException(BaseException):
+    pass
+class Results:
+    """Various results produced by the NLP++ analyzer."""
+    def __init__(self, outtext: str, outdir: PathLike):
+        LOGGER.info("Reading output from %s", outdir)
+        self.output_text = outtext
+        self.outdir = Path(outdir)
+    @property
+    def final_tree(self) -> Optional[str]:
+        """The final parse tree, if any was produced."""
+        return maybe_readfile(self.outdir / "final.tree")
+    @property
+    def output_json(self) -> Optional[str]:
+        """The output JSON text, if any was produced."""
+        return maybe_readfile(self.outdir / "output.json")
+    @property
+    def output(self) -> Optional[Any]:
+        """The parsed output Json, if any was produced"""
+        output_json = self.output_json
+        if output_json is not None:
+            return json.loads(output_json)
+        return None
+class Engine:
+    """NLP++ Engine for a given working folder.
+    Args:
+      working_folder(optional, PathLike): Working folder for this
+           instance.  If None, a temporary directory will be created
+           and initialized with the default analyzers.  Otherwise,
+           this must contain an `analyzers` and a `data` folder,
+           unless `initialize` is `True`.
+      verbose(optional, bool): Be more verbose.
+      initialize(optional, bool): Initialize `working_folder` with
+           the default analyzers.
+    """
+    def __init__(
+        self,
+        working_folder: Optional[PathLike] = None,
+        analyzer_path: str = None,
+        verbose: bool = False,
+        initialize: bool = False,
+    ):
+        self._closed = False
+        if working_folder is None:
+            # ignore_cleanup_errors=True is now defense-in-depth: close()
+            # / __exit__ / __del__ explicitly call self.engine.close()
+            # before TemporaryDirectory.cleanup, which closes the engine's
+            # cgerr.log handle and lets Windows delete the temp dir
+            # cleanly. The flag still catches the corner case of an
+            # interpreter shutdown where __del__ never runs (e.g. crash,
+            # os._exit) — the OS reclaims the tempdir at process exit
+            # regardless, so swallowing the cleanup error keeps stderr
+            # quiet for users who never explicitly close. Engine-side
+            # fix shipped in NLP-ENGINE-523 (engine v3.1.55+).
+            self.tmpdir = TemporaryDirectory(
+                prefix="NLPPlus-", ignore_cleanup_errors=True
+            )
+            self.working_folder = Path(self.tmpdir.name)
+            initialize = True
+        else:
+            self.tmpdir = None
+            self.working_folder = Path(working_folder)
+        self.analyzer_path = None
+        if initialize:
+            copytree(
+                Path(__file__).parent / "analyzers", self.working_folder / "analyzers"
+            )
+            copytree(Path(__file__).parent / "data", self.working_folder / "data")
+            LOGGER.info("Initialized working folder in %s", self.working_folder)
+        if not (self.working_folder / "analyzers").is_dir():
+            raise EngineException(
+                f"analyzers directory not found in folder '{working_folder}'"
+            )
+        if not (self.working_folder / "data").is_dir():
+            raise EngineException(
+                f"data directory not found in folder '{working_folder}'"
+            )
+        self.engine = NLP_ENGINE(str(self.working_folder), silent=not verbose)
+    def close(self):
+        """Tear down the underlying engine and release the working folder.
+        Idempotent: safe to call multiple times. After ``close()``, any
+        call to :meth:`analyze`, :meth:`compile`, or :meth:`cloud_compile`
+        is undefined behavior — create a new ``Engine`` instead.
+        On Windows in particular, calling ``close()`` (or using ``Engine``
+        as a context manager) is what makes the auto-created
+        ``TemporaryDirectory`` working folder delete cleanly: the engine
+        keeps a file handle on ``<workfolder>/logs/cgerr.log`` open for
+        the lifetime of the C++ instance, and Windows refuses to delete
+        a directory that contains an open file. ``close()`` calls into
+        the C++ engine's ``close()`` (NLP-ENGINE-523, engine v3.1.55+),
+        which releases that handle before the tempdir is removed.
+        """
+        if self._closed:
+            return
+        self._closed = True
+        # Engine.close() is idempotent on the C++ side as of engine
+        # v3.1.55; older engines just no-op the second teardown.
+        try:
+            self.engine.close()
+        except AttributeError:
+            # Pre-3.1.55 binding without the close() method exposed;
+            # fall back to letting __del__ tear it down. The tempdir
+            # cleanup below will still hit the PermissionError on
+            # Windows in that case — same situation as before 2.0.4.
+            pass
+        if self.tmpdir is not None:
+            self.tmpdir.cleanup()
+            self.tmpdir = None
+    def __enter__(self):
+        return self
+    def __exit__(self, exc_type, exc_value, traceback):
+        self.close()
+        return False
+    def __del__(self):
+        # Best-effort: __del__ may run during interpreter shutdown when
+        # modules are being torn down and the bindings module may already
+        # be gone. Swallow anything that goes wrong here; the explicit
+        # close() / context-manager paths are the supported way to get
+        # deterministic cleanup.
+        try:
+            self.close()
+        except Exception:
+            pass
+    def analyze(self, text: str, analyzer_name: str, develop: bool = False,
+                compiled: bool = False) -> Results:
+        """Analyze text with the named analyzer.
+        Args:
+          text: input text to analyze.
+          analyzer_name: name of the analyzer under the working folder.
+          develop: if True, the engine emits intermediate log/tree files
+                   into the analyzer's `_log` directory.
+          compiled: if True, the engine loads the analyzer's compiled
+                    shared libraries (``bin/run.<ext>`` for the analyzer
+                    body and ``bin/kb.<ext>`` for the compiled KB)
+                    instead of running interpreted from the ``.nlp``
+                    source.  See :meth:`compile` to produce the
+                    generated C++ sources for those libraries, and the
+                    package README for the cmake / cloud build step
+                    that turns them into the actual ``.so``/``.dylib``/
+                    ``.dll`` files.
+        """
+        analyzer_name = Path(analyzer_name)
+        outdir = self.working_folder / "analyzers" / analyzer_name / "output"
+        if self.analyzer_path:
+            analyzer_name = Path(self.analyzer_path) / analyzer_name
+            outdir = Path(self.analyzer_path) / "analyzers" / analyzer_name / "output"
+        # Delete all files in the outdir
+        file_list = glob.glob(str(outdir / "*"))
+        for file_path in file_list:
+            os.remove(file_path)
+        outtext = self.engine.analyze(str(analyzer_name), text, develop,
+                                      compiled)
+        return Results(outtext, outdir)
+    def compile(self, analyzer_name: str, develop: bool = False,
+                kb_only: bool = False, analyzer_only: bool = False) -> Path:
+        """Generate C++ source files for the named analyzer.
+        Runs the engine in ``-COMPILE`` mode, or ``-COMPILEKB`` if
+        ``kb_only=True`` (KB only), or ``-COMPILEANA`` if
+        ``analyzer_only=True`` (analyzer rules only, skipping the KB).
+        ``-COMPILE`` emits the analyzer body under ``<analyzer>/run/``
+        and the knowledge base under ``<analyzer>/kb/``; ``-COMPILEKB``
+        emits just ``<analyzer>/kb/``; ``-COMPILEANA`` emits just
+        ``<analyzer>/run/``.  Returns the analyzer directory containing
+        those generated trees.
+        Use ``analyzer_only=True`` when only the rules changed and the
+        KB is already compiled.  ``kb_only`` and ``analyzer_only`` are
+        mutually exclusive.
+        The generated C++ still needs to be built into shared
+        libraries before :meth:`analyze` can load them with
+        ``compiled=True``.  Use :meth:`cloud_compile` to do the build
+        step via the public nlp-compile-service in one call.
+        """
+        if kb_only and analyzer_only:
+            raise ValueError("compile: kb_only and analyzer_only are mutually exclusive")
+        analyzer_name_p = Path(analyzer_name)
+        if self.analyzer_path:
+            analyzer_dir = (
+                Path(self.analyzer_path) / "analyzers" / analyzer_name_p
+            )
+            engine_arg = str(Path(self.analyzer_path) / analyzer_name_p)
+        else:
+            analyzer_dir = (
+                self.working_folder / "analyzers" / analyzer_name_p
+            )
+            engine_arg = str(analyzer_name_p)
+        self.engine.compile(engine_arg, develop, kb_only, analyzer_only)
+        return analyzer_dir
+    def cloud_compile(self, analyzer_name: str,
+                      dispatcher_url: Optional[str] = None,
+                      kb_only: bool = False,
+                      analyzer_only: bool = False,
+                      develop: bool = False,
+                      poll_interval: float = 2.0,
+                      timeout: float = 30 * 60,
+                      skip_local_compile: bool = False) -> Path:
+        """End-to-end compile: codegen + cloud build + stage into bin/.
+        Runs :meth:`compile` to produce the analyzer's ``run/`` + ``kb/``
+        C++ trees (unless ``skip_local_compile=True``), packages them
+        plus an auto-generated ``StdAfx.h`` stub into a tarball, submits
+        that tarball to the public nlp-compile-service dispatcher, polls
+        for the GitHub-Actions runner build to complete, downloads the
+        resulting shared library, and stages it into
+        ``<analyzer>/bin/`` as ``run.<ext>`` and ``kb.<ext>`` (and the
+        Windows ``runu.<ext>`` / ``kbu.<ext>`` variants).  After this
+        returns, :meth:`analyze` with ``compiled=True`` will load the
+        compiled artifact.
+        Returns the ``bin/`` directory path.
+        Args:
+          analyzer_name: analyzer under the engine's working folder.
+          dispatcher_url: override the public dispatcher endpoint
+            (default: ``cloud.DEFAULT_DISPATCHER_URL``).
+          kb_only: compile only the KB.
+          analyzer_only: compile only the analyzer rules (skip the KB).
+            Mutually exclusive with ``kb_only``.
+          develop: forwarded to local ``-COMPILE``.
+          poll_interval: seconds between job-status checks.
+          timeout: max seconds to wait for the runner build.
+          skip_local_compile: if True, assume ``run/`` and ``kb/``
+            already exist under the analyzer dir.
+        """
+        # Import here so the rest of the package keeps working in
+        # environments that don't have an `urllib`-friendly TLS stack.
+        from . import cloud
+        return cloud.cloud_compile(
+            self, analyzer_name,
+            dispatcher_url=dispatcher_url or cloud.DEFAULT_DISPATCHER_URL,
+            kb_only=kb_only, analyzer_only=analyzer_only, develop=develop,
+            poll_interval=poll_interval, timeout=timeout,
+            skip_local_compile=skip_local_compile,
+        )
+    def input_text(self, analyzer_name: str, file_name: str) -> str:
+        """Return the text from a file in the input directory."""
+        file_path = Path(self.analyzer_path) / analyzer_name / "input" / file_name
+        if not file_path.is_file():
+            raise EngineException(
+                f"File not found in input directory '{file_path}'"
+            )
+        with open(file_path, "rt", encoding="utf-8") as file:
+            text = file.read()
+        return text
+    def set_analyzers_folder(self, analyzer_name: str):
+        """Set analyzers directory path."""
+        self.analyzer_path = analyzer_name
+    def copy_library_analyzers(self, to_dir: str, overwrite: bool=True):
+        """Copy the library files to a directory."""
+        copy_it = True
+        if os.path.exists(to_dir):
+            if overwrite:
+                rmtree(to_dir)
+            else:
+                copy_it = False
+        if copy_it:
+            copytree(
+                Path(__file__).parent / "analyzers", Path(to_dir)
+            )
+        self.analyzer_path = str(to_dir)
+engine = Engine()
+def set_working_folder(working_folder: Optional[str] = None, initialize: bool = False):
+    """Reinitialize the NLP++ engine with a different working folder.
+    Args:
+      working_folder(str): Working folder to use, or `None` to use the
+                           current working directory.
+      initialize(bool): Initialize the new working folder with the built-in
+                        analyzers and data.  (Optional, default=False)
+    """
+    global engine
+    if working_folder is None:
+        working_folder = getcwd()
+    engine = Engine(Path(working_folder), initialize=initialize)
+def copy_library_analyzers(analyzer_folder_path: str, overwrite=True):
+    """Run the analyzer named on the input string."""
+    engine.copy_library_analyzers(analyzer_folder_path, overwrite)
+def set_analyzers_folder(analyzer_folder_path: str):
+    """Run the analyzer named on the input string."""
+    engine.set_analyzers_folder(analyzer_folder_path)
+def analyze(text: str, parser: str = "parse-en-us", develop: bool = False,
+            compiled: bool = False) -> str:
+    """Run the analyzer named on the input string.
+    If ``compiled=True``, the engine loads the analyzer's compiled
+    shared libraries (``bin/run.<ext>`` and ``bin/kb.<ext>``) instead of
+    running interpreted.  See :func:`compile` for producing those.
+    """
+    return engine.analyze(text, parser, develop, compiled).output_text
+def compile(analyzer: str = "parse-en-us", develop: bool = False,
+            kb_only: bool = False, analyzer_only: bool = False):
+    """Generate C++ source files for the named analyzer.
+    Wraps :meth:`Engine.compile`.  The generated trees land under
+    ``<analyzer>/run/`` and ``<analyzer>/kb/`` inside the engine's
+    working folder (or just ``kb/`` for ``kb_only``, or just ``run/``
+    for ``analyzer_only``); they still need to be built into shared
+    libraries before :func:`analyze` can load them with
+    ``compiled=True``.
+    """
+    return engine.compile(analyzer, develop, kb_only, analyzer_only)
+def cloud_compile(analyzer: str = "parse-en-us",
+                  dispatcher_url: Optional[str] = None,
+                  kb_only: bool = False,
+                  analyzer_only: bool = False,
+                  develop: bool = False,
+                  poll_interval: float = 2.0,
+                  timeout: float = 30 * 60,
+                  skip_local_compile: bool = False):
+    """Compile an analyzer end-to-end via the public nlp-compile-service.
+    Wraps :meth:`Engine.cloud_compile` — see that method for the full
+    docstring.  After this call returns, ``analyze(..., compiled=True)``
+    will pick up the staged shared libraries from the analyzer's
+    ``bin/`` directory.
+    """
+    return engine.cloud_compile(
+        analyzer, dispatcher_url=dispatcher_url, kb_only=kb_only,
+        analyzer_only=analyzer_only, develop=develop,
+        poll_interval=poll_interval, timeout=timeout,
+        skip_local_compile=skip_local_compile,
+    )
+def input_text(analyzer_name: str, file_name: str):
+    """Return the text from a file in the input directory."""
+    return engine.intput_text(analyzer_name, file_name)

NLPPlus/_version.py ADDED Viewed

@@ -0,0 +1,24 @@
+# file generated by vcs-versioning
+# don't change, don't track in version control
+from __future__ import annotations
+__all__ = [
+    "__version__",
+    "__version_tuple__",
+    "version",
+    "version_tuple",
+    "__commit_id__",
+    "commit_id",
+]
+version: str
+__version__: str
+__version_tuple__: tuple[int | str, ...]
+version_tuple: tuple[int | str, ...]
+commit_id: str | None
+__commit_id__: str | None
+__version__ = version = '2.0.22'
+__version_tuple__ = version_tuple = (2, 0, 22)
+__commit_id__ = commit_id = 'g756328b5f'

NLPPlus/analyzers/.github/workflows/dispatch-update-package-analyzers.yml ADDED Viewed

@@ -0,0 +1,40 @@
+# Install at: VisualText/package-analyzers -> .github/workflows/dispatch-update-package-analyzers.yml
+#
+# Sender in the cross-repo percolation chain (see
+# VisualText/nlp-engine/docs/PERCOLATION.md).
+#
+# When a release tag (v*) is pushed here, ping the repos that embed
+# package-analyzers as a submodule so they refresh their pointer. Both the npm
+# and Python nlpengine packages listen for the `package-analyzers-release` event
+# and open a bump PR (they publish to npm/PyPI with their own versions, so a
+# submodule bump must not auto-publish — a human merges the PR).
+#
+# Requires the shared `CLASSIC_PAT` secret in this repo (the default
+# GITHUB_TOKEN cannot trigger workflows in other repos).
+name: Dispatch update-package-analyzers
+on:
+  push:
+    tags:
+      - 'v*'
+  workflow_dispatch:
+jobs:
+  dispatch:
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        repo:
+          - VisualText/npm-package-nlpengine   # embeds package-analyzers at path analyzers
+          - VisualText/py-package-nlpengine    # embeds package-analyzers at path NLPPlus/analyzers
+    steps:
+      - name: Trigger ${{ matrix.repo }}
+        uses: peter-evans/repository-dispatch@v2
+        with:
+          token: ${{ secrets.CLASSIC_PAT }}
+          repository: ${{ matrix.repo }}
+          event-type: package-analyzers-release
+          client-payload: '{"tag_name": "${{ github.ref_name }}"}'
+        continue-on-error: true

NLPPlus/analyzers/.github/workflows/tag-on-push.yml ADDED Viewed

@@ -0,0 +1,86 @@
+# Install at: VisualText/package-analyzers -> .github/workflows/tag-on-push.yml
+#
+# Source of the percolation chain for DIRECT analyzer edits (see
+# VisualText/nlp-engine/docs/PERCOLATION.md).
+#
+# When you edit an analyzer (address-parser, emailaddress, links, telephone) and
+# push to main, this auto-computes the next semver tag and pushes it. Pushing the
+# v* tag fires dispatch-update-package-analyzers.yml, which pings the npm and
+# Python packages to bump + auto-publish.
+#
+# parse-en-us is intentionally NOT in the path filter: parse-en-us changes arrive
+# via update-parse-en-us.yml (driven by parse-en-us-release), which tags itself.
+# Keeping the two paths disjoint avoids double-tagging.
+#
+# Requires the shared CLASSIC_PAT secret (a GITHUB_TOKEN-pushed tag cannot
+# trigger dispatch-update-package-analyzers.yml in turn).
+name: Tag on push
+on:
+  push:
+    branches: [main]
+    paths:
+      - 'address-parser/**'
+      - 'emailaddress/**'
+      - 'links/**'
+      - 'telephone/**'
+  workflow_dispatch:
+    inputs:
+      bump:
+        description: "Semver component to bump"
+        required: true
+        default: "patch"
+        type: choice
+        options: [patch, minor, major]
+permissions:
+  contents: write
+jobs:
+  tag:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+          token: ${{ secrets.CLASSIC_PAT }}
+      - name: Configure git identity
+        run: |
+          git config user.name  'github-actions[bot]'
+          git config user.email '41898282+github-actions[bot]@users.noreply.github.com'
+      - name: Compute next version
+        id: version
+        run: |
+          set -euo pipefail
+          bump="${{ inputs.bump }}"
+          bump="${bump:-patch}"   # push events have no inputs
+          latest=$(git tag --sort=-v:refname \
+                   | grep -E '^v[0-9]+\.[0-9]+\.[0-9]+$' \
+                   | head -n1 || true)
+          latest=${latest:-v0.0.0}
+          IFS='.' read -r major minor patch <<<"${latest#v}"
+          case "$bump" in
+            major) major=$((major + 1)); minor=0; patch=0 ;;
+            minor) minor=$((minor + 1)); patch=0 ;;
+            patch) patch=$((patch + 1)) ;;
+          esac
+          new="v${major}.${minor}.${patch}"
+          echo "new=$new" >> "$GITHUB_OUTPUT"
+          echo "Tagging $latest -> $new"
+      - name: Tag and push
+        run: |
+          set -euo pipefail
+          git tag -a "${{ steps.version.outputs.new }}" -m "Release ${{ steps.version.outputs.new }}"
+          git push origin "${{ steps.version.outputs.new }}"
+      - name: Create GitHub release
+        uses: softprops/action-gh-release@v2
+        with:
+          tag_name: ${{ steps.version.outputs.new }}
+          name: ${{ steps.version.outputs.new }}
+          generate_release_notes: true
+          token: ${{ secrets.CLASSIC_PAT || secrets.GITHUB_TOKEN }}

NLPPlus/analyzers/.github/workflows/update-parse-en-us.yml ADDED Viewed

@@ -0,0 +1,130 @@
+# Install at: VisualText/package-analyzers -> .github/workflows/update-parse-en-us.yml
+#
+# Listener in the cross-repo percolation chain (see
+# VisualText/nlp-engine/docs/PERCOLATION.md).
+#
+# 1. Pulls the latest commit of the parse-en-us submodule's default branch (main).
+# 2. Computes a new semver tag by bumping the latest vX.Y.Z tag.
+# 3. Commits the submodule pointer change, tags, and pushes.
+# 4. Cuts a GitHub release at the new tag. Pushing the v* tag fires
+#    dispatch-update-package-analyzers.yml, which pings the npm and Python
+#    packages to refresh their analyzers submodule.
+#
+# Triggered by parse-en-us (event-type parse-en-us-release) on its release, or
+# by hand. A repository_dispatch ping carries no inputs, so the bump defaults to
+# patch.
+name: Update parse-en-us & Bump Version
+on:
+  workflow_dispatch:
+    inputs:
+      bump:
+        description: "Semver component to bump"
+        required: true
+        default: "patch"
+        type: choice
+        options:
+          - patch
+          - minor
+          - major
+      create_release:
+        description: "Create a GitHub release at the new tag"
+        required: true
+        default: true
+        type: boolean
+  repository_dispatch:
+    types: [parse-en-us-release]
+permissions:
+  contents: write
+jobs:
+  update-and-bump:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout (with submodules and full history)
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+          submodules: recursive
+          token: ${{ secrets.CLASSIC_PAT || secrets.GITHUB_TOKEN }}
+      - name: Configure git identity
+        run: |
+          git config user.name  'github-actions[bot]'
+          git config user.email '41898282+github-actions[bot]@users.noreply.github.com'
+      - name: Update parse-en-us to latest main
+        id: submodule
+        run: |
+          set -euo pipefail
+          before=$(git -C parse-en-us rev-parse HEAD)
+          git -C parse-en-us fetch origin main
+          git -C parse-en-us checkout origin/main
+          git -C parse-en-us submodule update --init --recursive
+          after=$(git -C parse-en-us rev-parse HEAD)
+          echo "before=$before" >> "$GITHUB_OUTPUT"
+          echo "after=$after"   >> "$GITHUB_OUTPUT"
+          if [ "$before" = "$after" ]; then
+            echo "changed=false" >> "$GITHUB_OUTPUT"
+            echo "parse-en-us already at $after"
+          else
+            echo "changed=true" >> "$GITHUB_OUTPUT"
+            echo "parse-en-us: $before -> $after"
+          fi
+      - name: Compute next version
+        id: version
+        run: |
+          set -euo pipefail
+          bump="${{ inputs.bump }}"
+          bump="${bump:-patch}"   # repository_dispatch has no inputs
+          latest=$(git tag --sort=-v:refname \
+                   | grep -E '^v[0-9]+\.[0-9]+\.[0-9]+$' \
+                   | head -n1 || true)
+          latest=${latest:-v0.0.0}
+          IFS='.' read -r major minor patch <<<"${latest#v}"
+          case "$bump" in
+            major) major=$((major + 1)); minor=0; patch=0 ;;
+            minor) minor=$((minor + 1)); patch=0 ;;
+            patch) patch=$((patch + 1)) ;;
+          esac
+          new="v${major}.${minor}.${patch}"
+          echo "previous=$latest" >> "$GITHUB_OUTPUT"
+          echo "new=$new"         >> "$GITHUB_OUTPUT"
+          echo "Bumping $latest -> $new"
+      - name: Commit submodule update
+        if: steps.submodule.outputs.changed == 'true'
+        run: |
+          git add parse-en-us
+          git commit -m "Update parse-en-us to ${{ steps.submodule.outputs.after }} (${{ steps.version.outputs.new }})"
+      - name: Tag and push
+        run: |
+          set -euo pipefail
+          git tag -a "${{ steps.version.outputs.new }}" \
+            -m "Release ${{ steps.version.outputs.new }}"
+          git push origin HEAD
+          git push origin "${{ steps.version.outputs.new }}"
+      - name: Create GitHub release
+        if: ${{ github.event_name != 'workflow_dispatch' || inputs.create_release }}
+        uses: softprops/action-gh-release@v2
+        with:
+          tag_name: ${{ steps.version.outputs.new }}
+          name: ${{ steps.version.outputs.new }}
+          generate_release_notes: true
+          token: ${{ secrets.CLASSIC_PAT || secrets.GITHUB_TOKEN }}
+      - name: Summary
+        run: |
+          {
+            echo "### Update parse-en-us & bump version"
+            echo ""
+            echo "- previous tag: \`${{ steps.version.outputs.previous }}\`"
+            echo "- new tag: \`${{ steps.version.outputs.new }}\`"
+            echo "- parse-en-us changed: \`${{ steps.submodule.outputs.changed }}\`"
+            echo "- parse-en-us: \`${{ steps.submodule.outputs.before }}\` → \`${{ steps.submodule.outputs.after }}\`"
+          } >> "$GITHUB_STEP_SUMMARY"