invoice2data 0.4.7__tar.gz → 0.5.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (262) hide show
  1. {invoice2data-0.4.7/src/invoice2data.egg-info → invoice2data-0.5.0}/PKG-INFO +5 -5
  2. {invoice2data-0.4.7 → invoice2data-0.5.0}/README.md +3 -2
  3. {invoice2data-0.4.7 → invoice2data-0.5.0}/pyproject.toml +3 -5
  4. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/__init__.py +1 -0
  5. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/__main__.py +64 -22
  6. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/invoice_template.py +11 -12
  7. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/loader.py +11 -14
  8. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/parsers/lines.py +29 -33
  9. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/parsers/regex.py +6 -9
  10. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/parsers/static.py +2 -4
  11. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/plugins/lines.py +2 -3
  12. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/plugins/tables.py +22 -24
  13. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/utils.py +1 -3
  14. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/input/gvision.py +2 -3
  15. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/input/ocrmypdf.py +7 -11
  16. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/input/pdfminer_wrapper.py +3 -5
  17. invoice2data-0.5.0/src/invoice2data/input/pdfplumber.py +46 -0
  18. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/input/pdftotext.py +3 -4
  19. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/input/tesseract.py +3 -6
  20. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/output/to_csv.py +2 -4
  21. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/output/to_json.py +3 -5
  22. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/output/to_xml.py +5 -7
  23. {invoice2data-0.4.7 → invoice2data-0.5.0/src/invoice2data.egg-info}/PKG-INFO +5 -5
  24. {invoice2data-0.4.7 → invoice2data-0.5.0}/tests/test_cli.py +4 -5
  25. {invoice2data-0.4.7 → invoice2data-0.5.0}/tests/test_extraction.py +1 -2
  26. {invoice2data-0.4.7 → invoice2data-0.5.0}/tests/test_invoice_template.py +10 -12
  27. {invoice2data-0.4.7 → invoice2data-0.5.0}/tests/test_lib.py +27 -5
  28. {invoice2data-0.4.7 → invoice2data-0.5.0}/tests/test_loader.py +1 -1
  29. invoice2data-0.4.7/src/invoice2data/input/pdfplumber.py +0 -66
  30. {invoice2data-0.4.7 → invoice2data-0.5.0}/LICENSE.md +0 -0
  31. {invoice2data-0.4.7 → invoice2data-0.5.0}/setup.cfg +0 -0
  32. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/__init__.py +0 -0
  33. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/parsers/__init__.py +0 -0
  34. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/parsers/__interface__.py +0 -0
  35. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/plugins/__init__.py +0 -0
  36. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/plugins/__interface__.py +0 -0
  37. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/au/au.com.opal.yml +0 -0
  38. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/au/au.com.telstra.yml +0 -0
  39. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/be/be.accor.invest.ibis.yml +0 -0
  40. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/be/be.accor.invest.novotel.yml +0 -0
  41. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/be/be.boucherie.pochet.yml +0 -0
  42. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/be/be.cebeo.yml +0 -0
  43. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/be/be.eg_retail.yml +0 -0
  44. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/be/be.lampiris.facture-dacompte.yml +0 -0
  45. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/be/be.lampiris.factuur.yml +0 -0
  46. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/be/be.lampiris.regularisation.yml +0 -0
  47. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/be/be.melchior-vins.yml +0 -0
  48. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/be/be.proximus.yml +0 -0
  49. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/be/be.scarlet.yml +0 -0
  50. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/be/be.securex.social.yml +0 -0
  51. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/ch/ch.pcengines.yml +0 -0
  52. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/com/com.AzureInterior.yml +0 -0
  53. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/com/com.amazon.aws.yml +0 -0
  54. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/com/com.apple.yml +0 -0
  55. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/com/com.apps4rent.yml +0 -0
  56. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/com/com.binarylife.yml +0 -0
  57. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/com/com.bloomberg.yml +0 -0
  58. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/com/com.cloudflare.yml +0 -0
  59. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/com/com.cloudns.yml +0 -0
  60. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/com/com.datadoghq.yml +0 -0
  61. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/com/com.digitalocean.yml +0 -0
  62. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/com/com.envato.yml +0 -0
  63. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/com/com.eur.aliexpress.json +0 -0
  64. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/com/com.expressvpn.yml +0 -0
  65. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/com/com.expressvpn_prio6.yml +0 -0
  66. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/com/com.flipkart.WSRetail.json +0 -0
  67. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/com/com.ftserussell.yml +0 -0
  68. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/com/com.github.yml +0 -0
  69. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/com/com.globalsign.yml +0 -0
  70. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/com/com.google.adwords.hk.yml +0 -0
  71. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/com/com.hetzner.yml +0 -0
  72. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/com/com.hobohost.yml +0 -0
  73. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/com/com.jamiepro.yml +0 -0
  74. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/com/com.linode.yml +0 -0
  75. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/com/com.microsoftonline.hk-v2017.yml +0 -0
  76. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/com/com.microsoftonline.hk.yml +0 -0
  77. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/com/com.mongodb.yml +0 -0
  78. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/com/com.namecheap.yml +0 -0
  79. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/com/com.namesilo.yml +0 -0
  80. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/com/com.newrelic.yml +0 -0
  81. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/com/com.nl.lenovo.digitalriver.yml +0 -0
  82. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/com/com.nmmn.yml +0 -0
  83. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/com/com.nodisto.yml +0 -0
  84. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/com/com.nyse.yml +0 -0
  85. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/com/com.oyo.invoice.yml +0 -0
  86. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/com/com.packtpub.yml +0 -0
  87. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/com/com.pixartprinting.yml +0 -0
  88. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/com/com.runbox.yml +0 -0
  89. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/com/com.sammymaystone.yml +0 -0
  90. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/com/com.scaleway.yml +0 -0
  91. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/com/com.textmaster.yml +0 -0
  92. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/com/com.tmx.yml +0 -0
  93. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/com/com.travis-ci.yml +0 -0
  94. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/com/com.twitter.de.yml +0 -0
  95. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/com/com.twitter.uk.yml +0 -0
  96. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/com/com.twitter.yml +0 -0
  97. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/com/com.upwork.yml +0 -0
  98. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/com/com.usersnap.yml +0 -0
  99. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/com/com.vultr.yml +0 -0
  100. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/de/de.amazon.yml +0 -0
  101. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/de/de.bettina-kast.yml +0 -0
  102. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/de/de.digikey.com.yml +0 -0
  103. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/de/de.hosteurope.yml +0 -0
  104. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/de/de.notebooksbilligerBillPay.yml +0 -0
  105. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/de/de.ovh.yml +0 -0
  106. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/de/de.qualityhosting.yml +0 -0
  107. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/de/de.united-domains.yml +0 -0
  108. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/es/com.mob-barcelona.caterina.yml +0 -0
  109. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/es/com.pepephone.yml +0 -0
  110. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/es/es.amazon.yml +0 -0
  111. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/es/es.digimobile.yml +0 -0
  112. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/es/es.supplies24.yml +0 -0
  113. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/fr/co.mooncard.yml +0 -0
  114. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/fr/com.adobe.ie.yml +0 -0
  115. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/fr/com.akretion.fr.yml +0 -0
  116. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/fr/com.amazon.aws.yml +0 -0
  117. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/fr/com.ateliercopieservice.yml +0 -0
  118. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/fr/com.chauffeur-prive.yml +0 -0
  119. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/fr/com.coriolis.yml +0 -0
  120. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/fr/com.easyjet.fr.yml +0 -0
  121. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/fr/com.eaudugrandlyon.yml +0 -0
  122. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/fr/com.godaddy.yml +0 -0
  123. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/fr/com.google.ie.yml +0 -0
  124. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/fr/com.hootsuite.yml +0 -0
  125. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/fr/com.jeanbesson.yml +0 -0
  126. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/fr/com.ldlc.yml +0 -0
  127. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/fr/com.linkedin.yml +0 -0
  128. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/fr/com.mention.yml +0 -0
  129. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/fr/com.microsoft.ie.yml +0 -0
  130. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/fr/com.myflyingbox.yml +0 -0
  131. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/fr/com.officetimeline.yml +0 -0
  132. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/fr/com.orange-business.mobile.yml +0 -0
  133. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/fr/com.ovh.fr.yml +0 -0
  134. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/fr/com.rs-online.fr.yml +0 -0
  135. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/fr/com.saur.yml +0 -0
  136. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/fr/com.soyoustart.yml +0 -0
  137. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/fr/com.vinci-autoroutes.yml +0 -0
  138. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/fr/dolibarr.generique.yml +0 -0
  139. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/fr/eu.trainline.yml +0 -0
  140. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/fr/fr.actn.yml +0 -0
  141. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/fr/fr.airfrance.yml +0 -0
  142. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/fr/fr.also.yml +0 -0
  143. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/fr/fr.amazon.yml +0 -0
  144. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/fr/fr.assurance-epargne-pension.yml +0 -0
  145. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/fr/fr.bouyguestelecom.adsl-fiber.yml +0 -0
  146. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/fr/fr.bouyguestelecom.mobile.yml +0 -0
  147. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/fr/fr.butagaz.yml +0 -0
  148. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/fr/fr.chronopost.yml +0 -0
  149. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/fr/fr.dirafi.yml +0 -0
  150. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/fr/fr.domaine-achat.yml +0 -0
  151. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/fr/fr.easytrip.yml +0 -0
  152. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/fr/fr.edf.entreprises.yml +0 -0
  153. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/fr/fr.edf.pme.yml +0 -0
  154. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/fr/fr.finagaz.yml +0 -0
  155. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/fr/fr.fountain.yml +0 -0
  156. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/fr/fr.free.adsl-fiber.yml +0 -0
  157. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/fr/fr.free.mobile.yml +0 -0
  158. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/fr/fr.free.mobile2.yml +0 -0
  159. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/fr/fr.futur.yml +0 -0
  160. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/fr/fr.ge-iroise.yml +0 -0
  161. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/fr/fr.google.yml +0 -0
  162. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/fr/fr.greffe-tc-lyon.yml +0 -0
  163. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/fr/fr.hiscox.yml +0 -0
  164. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/fr/fr.internetsatellite.yml +0 -0
  165. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/fr/fr.jpg.yml +0 -0
  166. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/fr/fr.kubii.yml +0 -0
  167. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/fr/fr.laposte.boutique.yml +0 -0
  168. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/fr/fr.laposte.coliposte.yml +0 -0
  169. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/fr/fr.lecab.yml +0 -0
  170. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/fr/fr.leroymerlin.yml +0 -0
  171. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/fr/fr.maaf.yml +0 -0
  172. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/fr/fr.mediapart.yml +0 -0
  173. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/fr/fr.moneo-resto.yml +0 -0
  174. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/fr/fr.mouser.yml +0 -0
  175. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/fr/fr.mycelium-roulement.yml +0 -0
  176. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/fr/fr.napsis.yml +0 -0
  177. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/fr/fr.nexity.yml +0 -0
  178. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/fr/fr.orange.fibre.yml +0 -0
  179. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/fr/fr.orange.fixedline.yml +0 -0
  180. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/fr/fr.prestaclic.yml +0 -0
  181. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/fr/fr.publicationannoncelegale.yml +0 -0
  182. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/fr/fr.sfr.adsl-fiber.yml +0 -0
  183. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/fr/fr.sfr.mobile.yml +0 -0
  184. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/fr/fr.sosh.yml +0 -0
  185. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/fr/fr.teledec.yml +0 -0
  186. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/fr/fr.topoffice.yml +0 -0
  187. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/fr/net.online.yml +0 -0
  188. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/fr/net.scaleway.yml +0 -0
  189. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/nl/nl.accor.rhine.opco hotels.json +0 -0
  190. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/nl/nl.action.yml +0 -0
  191. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/nl/nl.agrisneltank.json +0 -0
  192. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/nl/nl.albron.yml +0 -0
  193. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/nl/nl.anwb.yml +0 -0
  194. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/nl/nl.argos.json +0 -0
  195. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/nl/nl.be.coolblue.yml +0 -0
  196. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/nl/nl.begra.yml +0 -0
  197. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/nl/nl.blokker.yml +0 -0
  198. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/nl/nl.bouwmans.yml +0 -0
  199. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/nl/nl.bp.yml +0 -0
  200. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/nl/nl.buijtendijk.yml +0 -0
  201. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/nl/nl.bunq.yml +0 -0
  202. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/nl/nl.cpe.yml +0 -0
  203. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/nl/nl.esso_eg_services.yml +0 -0
  204. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/nl/nl.esso_eg_services_v2.yml +0 -0
  205. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/nl/nl.farnell.yml +0 -0
  206. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/nl/nl.fedex.json +0 -0
  207. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/nl/nl.ferbox.yml +0 -0
  208. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/nl/nl.fletcher.yml +0 -0
  209. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/nl/nl.gamma.yml +0 -0
  210. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/nl/nl.goos.yml +0 -0
  211. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/nl/nl.gulf.yml +0 -0
  212. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/nl/nl.ipparking.paleiskwartier.yml +0 -0
  213. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/nl/nl.karwei.yml +0 -0
  214. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/nl/nl.kav.yml +0 -0
  215. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/nl/nl.koffiehenk.yml +0 -0
  216. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/nl/nl.kuwait-q8.json +0 -0
  217. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/nl/nl.makro.json +0 -0
  218. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/nl/nl.marktplaats.json +0 -0
  219. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/nl/nl.megekko.json +0 -0
  220. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/nl/nl.momentsenmore.yml +0 -0
  221. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/nl/nl.ns.invoice.yml +0 -0
  222. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/nl/nl.odido.json +0 -0
  223. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/nl/nl.ok.yml +0 -0
  224. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/nl/nl.parkmobile.yml +0 -0
  225. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/nl/nl.praxis.yml +0 -0
  226. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/nl/nl.reclameland.yml +0 -0
  227. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/nl/nl.saeco.philips.eluscious.yml +0 -0
  228. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/nl/nl.shell_nederland.yml +0 -0
  229. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/nl/nl.shell_schellenkens.yml +0 -0
  230. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/nl/nl.simpel.yml +0 -0
  231. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/nl/nl.tango.json +0 -0
  232. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/nl/nl.total_express.yml +0 -0
  233. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/nl/nl.total_ototol.yml +0 -0
  234. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/nl/nl.total_servauto_ned.json +0 -0
  235. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/nl/nl.transip.yml +0 -0
  236. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/nl/nl.tuynder.yml +0 -0
  237. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/nl/nl.valk.exclusief.hotel.json +0 -0
  238. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/nl/nl.valk.exclusief.restaurant.json +0 -0
  239. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/nl/nl.vistaprint.yml +0 -0
  240. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/nl/nl.vodafone.yml +0 -0
  241. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/nl/nl.wasco.yml +0 -0
  242. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/nl/nl.weid.yml +0 -0
  243. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/nl/nl.yezzer.yml +0 -0
  244. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/nl/nl.zinkunie.yml +0 -0
  245. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/pl/pl.bmw-fs.yml +0 -0
  246. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/pl/pl.insert.subiekt-gt.yml +0 -0
  247. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/pl/pl.insert.subiekt-nexo.yml +0 -0
  248. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/pl/pl.ksef.yml +0 -0
  249. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/pl/pl.orlen.yml +0 -0
  250. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/pl/pl.p4.yml +0 -0
  251. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/extract/templates/pl/pl.paypro.yml +0 -0
  252. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/input/__init__.py +0 -0
  253. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/input/text.py +0 -0
  254. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/output/__init__.py +0 -0
  255. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data/py.typed +0 -0
  256. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data.egg-info/SOURCES.txt +0 -0
  257. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data.egg-info/dependency_links.txt +0 -0
  258. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data.egg-info/entry_points.txt +0 -0
  259. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data.egg-info/requires.txt +0 -0
  260. {invoice2data-0.4.7 → invoice2data-0.5.0}/src/invoice2data.egg-info/top_level.txt +0 -0
  261. {invoice2data-0.4.7 → invoice2data-0.5.0}/tests/test_gvision.py +0 -0
  262. {invoice2data-0.4.7 → invoice2data-0.5.0}/tests/test_main.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: invoice2data
3
- Version: 0.4.7
3
+ Version: 0.5.0
4
4
  Summary: Python parser to extract data from pdf invoice
5
5
  Author: Manuel Riel
6
6
  License: MIT
@@ -12,7 +12,6 @@ Keywords: python,data-mining,accounting,invoice,pdf,parcing
12
12
  Classifier: Programming Language :: Python :: 3
13
13
  Classifier: License :: OSI Approved :: MIT License
14
14
  Classifier: Operating System :: OS Independent
15
- Classifier: Programming Language :: Python :: 3.9
16
15
  Classifier: Programming Language :: Python :: 3.10
17
16
  Classifier: Programming Language :: Python :: 3.11
18
17
  Classifier: Programming Language :: Python :: 3.12
@@ -26,7 +25,7 @@ Classifier: Topic :: Office/Business :: Financial :: Accounting
26
25
  Classifier: Topic :: Office/Business :: Financial
27
26
  Classifier: Topic :: Software Development :: Libraries :: Python Modules
28
27
  Classifier: Development Status :: 5 - Production/Stable
29
- Requires-Python: >=3.9
28
+ Requires-Python: >=3.10
30
29
  Description-Content-Type: text/markdown
31
30
  License-File: LICENSE.md
32
31
  Requires-Dist: click>=8.0.1
@@ -178,7 +177,7 @@ Using in-house templates
178
177
  See `invoice2data/extract/templates` for existing templates. Just extend
179
178
  the list to add your own. If deployed by a bigger organisation, there
180
179
  should be an interface to edit templates for new suppliers. 80-20 rule.
181
- For a short tutorial on how to add new templates, see [tutorial.md](./docs/tutorial.md).
180
+ For a short tutorial on how to add new templates, see the [template creation tutorial][tutorial].
182
181
 
183
182
  Templates are based on Yaml or JSON. They define one or more keywords to find
184
183
  the right template, one or more exclude_keywords to further narrow it down
@@ -237,7 +236,7 @@ It can be installed on most distributions by:
237
236
  ## Development
238
237
 
239
238
  If you are interested in improving this project, have a look at our
240
- [developer guide](./CONTRIBUTING.md) to get you started quickly.
239
+ [contributor guide] to get you started quickly.
241
240
 
242
241
  ## Roadmap and open tasks
243
242
 
@@ -287,3 +286,4 @@ To learn more, see the [Contributor Guide].
287
286
  [license]: https://invoice2data.readthedocs.io/latest/license.html
288
287
  [contributor guide]: https://invoice2data.readthedocs.io/latest/contributing.html
289
288
  [command-line reference]: https://invoice2data.readthedocs.io/latest/usage.html
289
+ [tutorial]: https://invoice2data.readthedocs.io/latest/tutorial.html
@@ -125,7 +125,7 @@ Using in-house templates
125
125
  See `invoice2data/extract/templates` for existing templates. Just extend
126
126
  the list to add your own. If deployed by a bigger organisation, there
127
127
  should be an interface to edit templates for new suppliers. 80-20 rule.
128
- For a short tutorial on how to add new templates, see [tutorial.md](./docs/tutorial.md).
128
+ For a short tutorial on how to add new templates, see the [template creation tutorial][tutorial].
129
129
 
130
130
  Templates are based on Yaml or JSON. They define one or more keywords to find
131
131
  the right template, one or more exclude_keywords to further narrow it down
@@ -184,7 +184,7 @@ It can be installed on most distributions by:
184
184
  ## Development
185
185
 
186
186
  If you are interested in improving this project, have a look at our
187
- [developer guide](./CONTRIBUTING.md) to get you started quickly.
187
+ [contributor guide] to get you started quickly.
188
188
 
189
189
  ## Roadmap and open tasks
190
190
 
@@ -234,3 +234,4 @@ To learn more, see the [Contributor Guide].
234
234
  [license]: https://invoice2data.readthedocs.io/latest/license.html
235
235
  [contributor guide]: https://invoice2data.readthedocs.io/latest/contributing.html
236
236
  [command-line reference]: https://invoice2data.readthedocs.io/latest/usage.html
237
+ [tutorial]: https://invoice2data.readthedocs.io/latest/tutorial.html
@@ -6,16 +6,15 @@ build-backend = "setuptools.build_meta"
6
6
  name = "invoice2data"
7
7
  authors = [{ name = "Manuel Riel" }]
8
8
  description = "Python parser to extract data from pdf invoice"
9
- version = "0.4.7"
9
+ version = "0.5.0"
10
10
  keywords = ["python", "data-mining", "accounting", "invoice", "pdf", "parcing"]
11
11
  license = { text = "MIT" }
12
12
  readme = "README.md"
13
- requires-python = ">=3.9"
13
+ requires-python = ">=3.10"
14
14
  classifiers = [
15
15
  "Programming Language :: Python :: 3",
16
16
  "License :: OSI Approved :: MIT License",
17
17
  "Operating System :: OS Independent",
18
- "Programming Language :: Python :: 3.9",
19
18
  "Programming Language :: Python :: 3.10",
20
19
  "Programming Language :: Python :: 3.11",
21
20
  "Programming Language :: Python :: 3.12",
@@ -64,6 +63,7 @@ docs = [
64
63
  "sphinx-autobuild >=2021.3.14",
65
64
  "sphinx-click >=3.0.2",
66
65
  "sphinx-mermaid >=0.0.7",
66
+ "sphinxcontrib-svg2pdfconverter >=2.1.0",
67
67
  ]
68
68
  mypy = ["mypy >=0.930"]
69
69
  typeguard = ["typeguard >=2.13.3"]
@@ -161,8 +161,6 @@ extend-ignore = [
161
161
  "S101", # use of assert Activate later
162
162
  "S603",
163
163
  "UP031", # Use fstring instead of % identifier
164
- "UP006", # PEP 585 (list vs List): defer typing modernization to its own PR
165
- "UP035", # deprecated typing imports: defer typing modernization to its own PR
166
164
  "E501", # fix this when updating docstrings
167
165
  # "DOC106",
168
166
  # "DOC107",
@@ -1,3 +1,4 @@
1
1
  """Invoice2Data."""
2
2
 
3
+ from .__main__ import Invoice2Data # noqa: F401
3
4
  from .__main__ import extract_data # noqa: F401
@@ -9,10 +9,6 @@ from copy import deepcopy
9
9
  from os.path import join
10
10
  from typing import Any
11
11
  from typing import ClassVar
12
- from typing import Dict
13
- from typing import List
14
- from typing import Optional
15
- from typing import Tuple
16
12
 
17
13
  import click
18
14
 
@@ -110,9 +106,9 @@ if not logger.handlers:
110
106
 
111
107
  def extract_data(
112
108
  invoicefile: str,
113
- templates: Optional[List[InvoiceTemplate]] = None,
109
+ templates: list[InvoiceTemplate] | None = None,
114
110
  input_module: Any = None,
115
- ) -> Dict[str, Any]:
111
+ ) -> dict[str, Any]:
116
112
  """Extracts structured data from PDF/image invoices.
117
113
 
118
114
  This function uses the text extracted from a PDF file or image and
@@ -123,14 +119,14 @@ def extract_data(
123
119
 
124
120
  Args:
125
121
  invoicefile (str): Path of electronic invoice file in PDF, JPEG, PNG
126
- templates (Optional[List[InvoiceTemplate]]): List of instances of class `InvoiceTemplate`.
122
+ templates (list[InvoiceTemplate] | None): List of instances of class `InvoiceTemplate`.
127
123
  Templates are loaded using `read_template` function in `loader.py`.
128
124
  input_module (Any, optional): Library to be used to extract text
129
125
  from the given `invoicefile`.
130
126
  Choices: {'pdftotext', 'pdfminer', 'tesseract', 'text'}.
131
127
 
132
128
  Returns:
133
- Dict[str, Any]: Extracted and matched fields, or False if no template matches.
129
+ dict[str, Any]: Extracted and matched fields, or False if no template matches.
134
130
 
135
131
  Notes:
136
132
  Import the required `input_module` when using invoice2data as a library.
@@ -152,7 +148,15 @@ def extract_data(
152
148
  elif input_module is None:
153
149
  input_module = text if invoicefile.lower().endswith(".txt") else pdftotext
154
150
 
155
- extracted_str = input_module.to_text(invoicefile)
151
+ try:
152
+ extracted_str = input_module.to_text(invoicefile)
153
+ except Exception:
154
+ logger.exception(
155
+ "Failed to extract text from %s using %s",
156
+ invoicefile,
157
+ input_module.__name__,
158
+ )
159
+ return {}
156
160
  if not isinstance(extracted_str, str) or not extracted_str.strip():
157
161
  logger.error(
158
162
  "Failed to extract text from %s using %s",
@@ -192,14 +196,14 @@ def extract_data(
192
196
 
193
197
  def extract_data_fallback_ocrmypdf(
194
198
  invoicefile: str,
195
- templates: List[InvoiceTemplate],
199
+ templates: list[InvoiceTemplate],
196
200
  input_module: Any,
197
- ) -> Tuple[str, str, List[InvoiceTemplate]]:
201
+ ) -> tuple[str, str, list[InvoiceTemplate]]:
198
202
  logger.debug("Trying OCR extraction with ocrmypdf")
199
203
  extracted_str = ocrmypdf.to_text(invoicefile)
200
204
 
201
205
  # Convert the filter object to a list
202
- templates_matched: List[InvoiceTemplate] = list(
206
+ templates_matched: list[InvoiceTemplate] = list(
203
207
  filter(lambda t: t.matches_input(extracted_str), templates)
204
208
  )
205
209
  templates_matched.sort(key=lambda k: k["priority"], reverse=True)
@@ -211,6 +215,44 @@ def extract_data_fallback_ocrmypdf(
211
215
  return extracted_str, invoicefile, []
212
216
 
213
217
 
218
+ class Invoice2Data:
219
+ """Object-oriented interface around :func:`extract_data`.
220
+
221
+ Holds a reusable set of templates so several invoices can be processed
222
+ without reloading templates each time.
223
+
224
+ Args:
225
+ load_built_in_templates (bool): Load the bundled templates on init.
226
+ Defaults to True.
227
+ """
228
+
229
+ def __init__(self, load_built_in_templates: bool = True) -> None:
230
+ self.templates: list[InvoiceTemplate] = []
231
+ if load_built_in_templates:
232
+ self.templates += read_templates()
233
+
234
+ def read_templates(self, path: str) -> None:
235
+ """Add templates from a user folder to this instance.
236
+
237
+ Args:
238
+ path (str): Folder containing .yml/.json templates to load.
239
+ """
240
+ self.templates += read_templates(os.path.abspath(path))
241
+
242
+ def extract_data(self, path: str, input_module: Any = None) -> dict[str, Any]:
243
+ """Extract data from an invoice using this instance's templates.
244
+
245
+ Args:
246
+ path (str): Path to the invoice file.
247
+ input_module (Any): Text-extraction module to use. Defaults to None
248
+ (auto-detect between text and pdftotext).
249
+
250
+ Returns:
251
+ dict[str, Any]: Extracted fields, or an empty dict if none matched.
252
+ """
253
+ return extract_data(path, self.templates, input_module)
254
+
255
+
214
256
  @click.command()
215
257
  @click.option(
216
258
  "--input-reader",
@@ -268,17 +310,17 @@ def extract_data_fallback_ocrmypdf(
268
310
  )
269
311
  @click.version_option()
270
312
  def main(
271
- input_reader: Optional[str],
313
+ input_reader: str | None,
272
314
  output_format: str,
273
315
  output_date_format: str,
274
316
  output_name: str,
275
317
  debug: bool,
276
- copy: Optional[str],
277
- move: Optional[str],
318
+ copy: str | None,
319
+ move: str | None,
278
320
  filename_format: str,
279
- template_folder: Optional[str],
321
+ template_folder: str | None,
280
322
  exclude_built_in_templates: bool,
281
- input_files: Tuple[Any, ...],
323
+ input_files: tuple[Any, ...],
282
324
  ) -> None:
283
325
  """Extract data from PDF files and output it in a structured format."""
284
326
  if debug:
@@ -315,8 +357,8 @@ def main(
315
357
 
316
358
 
317
359
  def _load_templates(
318
- template_folder: Optional[str], exclude_built_in_templates: bool
319
- ) -> List[Any]:
360
+ template_folder: str | None, exclude_built_in_templates: bool
361
+ ) -> list[Any]:
320
362
  """Load templates from the specified folder."""
321
363
  templates = []
322
364
  if template_folder:
@@ -328,9 +370,9 @@ def _load_templates(
328
370
 
329
371
  def _process_and_move_copy(
330
372
  filename: str,
331
- res: Dict[str, Any],
332
- copy: Optional[str],
333
- move: Optional[str],
373
+ res: dict[str, Any],
374
+ copy: str | None,
375
+ move: str | None,
334
376
  filename_format: str,
335
377
  ) -> None:
336
378
  """Process the extracted data and copy/move the file."""
@@ -5,11 +5,10 @@ Templates are initially read from .yml files and then kept as class.
5
5
 
6
6
  import re
7
7
  import unicodedata
8
+ from collections import OrderedDict as OrderedDictType
8
9
  from logging import getLogger
9
10
  from pprint import pformat
10
11
  from typing import Any
11
- from typing import Dict
12
- from typing import OrderedDict as OrderedDictType
13
12
 
14
13
  import dateparser # type: ignore[import-untyped]
15
14
 
@@ -68,7 +67,7 @@ class InvoiceTemplate(OrderedDictType[str, Any]):
68
67
  super().__init__(*args, **kwargs)
69
68
 
70
69
  # Merge template-specific options with defaults
71
- self.options: Dict[str, Any] = OPTIONS_DEFAULT.copy()
70
+ self.options: dict[str, Any] = OPTIONS_DEFAULT.copy()
72
71
 
73
72
  if "options" in self:
74
73
  self.options.update(self["options"])
@@ -234,7 +233,7 @@ class InvoiceTemplate(OrderedDictType[str, Any]):
234
233
 
235
234
  def extract(
236
235
  self, optimized_str: str, invoice_file: str, input_module: Any
237
- ) -> Dict[str, Any]:
236
+ ) -> dict[str, Any]:
238
237
  """Extracts data from the optimized string using the template.
239
238
 
240
239
  Args:
@@ -243,7 +242,7 @@ class InvoiceTemplate(OrderedDictType[str, Any]):
243
242
  input_module (Any): The input module used.
244
243
 
245
244
  Returns:
246
- Dict[str, Any]: The extracted data.
245
+ dict[str, Any]: The extracted data.
247
246
 
248
247
  """
249
248
  output = _initialize_output_and_log(self, optimized_str)
@@ -274,7 +273,7 @@ class InvoiceTemplate(OrderedDictType[str, Any]):
274
273
 
275
274
  def _initialize_output_and_log(
276
275
  self: InvoiceTemplate, optimized_str: str
277
- ) -> Dict[str, Any]:
276
+ ) -> dict[str, Any]:
278
277
  """Initialize the output dictionary and log debug information."""
279
278
  logger.debug("START optimized_str ========================\n" + optimized_str)
280
279
  logger.debug("END optimized_str ==========================")
@@ -296,7 +295,7 @@ def _initialize_output_and_log(
296
295
 
297
296
  def _handle_area(
298
297
  self: InvoiceTemplate,
299
- v: Dict[str, Any],
298
+ v: dict[str, Any],
300
299
  input_module: Any,
301
300
  invoice_file: str,
302
301
  optimized_str: str,
@@ -317,9 +316,9 @@ def _handle_area(
317
316
  def _handle_parser(
318
317
  self: InvoiceTemplate,
319
318
  k: str,
320
- v: Dict[str, Any],
319
+ v: dict[str, Any],
321
320
  optimized_str_for_parser: str,
322
- output: Dict[str, Any],
321
+ output: dict[str, Any],
323
322
  ) -> None:
324
323
  """Handle parsing using different parsers."""
325
324
  if v["parser"] in PARSERS_MAPPING:
@@ -334,7 +333,7 @@ def _handle_parser(
334
333
 
335
334
 
336
335
  def _handle_legacy_syntax(
337
- self: InvoiceTemplate, k: str, v: Any, optimized_str: str, output: Dict[str, Any]
336
+ self: InvoiceTemplate, k: str, v: Any, optimized_str: str, output: dict[str, Any]
338
337
  ) -> None:
339
338
  """Handle legacy syntax for backward compatibility."""
340
339
  result = None
@@ -361,8 +360,8 @@ def _handle_legacy_syntax(
361
360
 
362
361
 
363
362
  def _check_required_fields(
364
- self: InvoiceTemplate, output: Dict[str, Any]
365
- ) -> Dict[str, Any]:
363
+ self: InvoiceTemplate, output: dict[str, Any]
364
+ ) -> dict[str, Any]:
366
365
  """Check if all required fields are present in the output."""
367
366
  if "required_fields" not in self.keys():
368
367
  required_fields = ["date", "amount", "invoice_number", "issuer"]
@@ -6,12 +6,9 @@ Templates are initially read from .yml or .json files and then kept as class.
6
6
  import codecs
7
7
  import json
8
8
  import os
9
+ from collections.abc import Callable
9
10
  from logging import getLogger
10
11
  from typing import Any
11
- from typing import Callable
12
- from typing import Dict
13
- from typing import List
14
- from typing import Optional
15
12
  from typing import cast
16
13
 
17
14
 
@@ -32,7 +29,7 @@ logger = getLogger(__name__)
32
29
 
33
30
  def ordered_load(
34
31
  stream: str, loader: Callable[[str], Any] = json.loads
35
- ) -> List[InvoiceTemplate]:
32
+ ) -> list[InvoiceTemplate]:
36
33
  """Loads a stream of JSON data.
37
34
 
38
35
  Args:
@@ -40,7 +37,7 @@ def ordered_load(
40
37
  loader (Callable[[str], Any], optional): JSON loader function. Defaults to json.loads.
41
38
 
42
39
  Returns:
43
- List[InvoiceTemplate]: List of InvoiceTemplate objects.
40
+ list[InvoiceTemplate]: List of InvoiceTemplate objects.
44
41
  """
45
42
  output = []
46
43
 
@@ -54,22 +51,22 @@ def ordered_load(
54
51
  for tpl in tpl_stream:
55
52
  tpl = prepare_template(tpl)
56
53
  if tpl:
57
- output.append(InvoiceTemplate(cast(Dict[str, Any], tpl)))
54
+ output.append(InvoiceTemplate(cast(dict[str, Any], tpl)))
58
55
 
59
56
  return output
60
57
 
61
58
 
62
- def read_templates(folder: Optional[str] = None) -> List[InvoiceTemplate]:
59
+ def read_templates(folder: str | None = None) -> list[InvoiceTemplate]:
63
60
  """Load YAML templates from template folder. Return list of dicts.
64
61
 
65
62
  Use built-in templates if no folder is set.
66
63
 
67
64
  Args:
68
- folder (Optional[str]): User-defined folder where templates are stored.
65
+ folder (str | None): User-defined folder where templates are stored.
69
66
  If None, uses built-in templates.
70
67
 
71
68
  Returns:
72
- List[InvoiceTemplate]: List of InvoiceTemplate objects.
69
+ list[InvoiceTemplate]: List of InvoiceTemplate objects.
73
70
 
74
71
  Examples:
75
72
  >>> templates = read_templates("./src/invoice2data/extract/templates/au")
@@ -109,20 +106,20 @@ def read_templates(folder: Optional[str] = None) -> List[InvoiceTemplate]:
109
106
  tpl = prepare_template(tpl)
110
107
 
111
108
  if tpl:
112
- output.append(InvoiceTemplate(cast(Dict[str, Any], tpl)))
109
+ output.append(InvoiceTemplate(cast(dict[str, Any], tpl)))
113
110
 
114
111
  logger.info("Loaded %d templates from %s", len(output), folder)
115
112
  return output
116
113
 
117
114
 
118
- def prepare_template(tpl: Dict[str, Any]) -> Optional[Dict[str, Any]]:
115
+ def prepare_template(tpl: dict[str, Any]) -> dict[str, Any] | None:
119
116
  """Prepare a template for use.
120
117
 
121
118
  Args:
122
- tpl (Dict[str, Any]): Template dictionary.
119
+ tpl (dict[str, Any]): Template dictionary.
123
120
 
124
121
  Returns:
125
- Optional[Dict[str, Any]]: Processed template dictionary.
122
+ dict[str, Any] | None: Processed template dictionary.
126
123
  """
127
124
  # Test if all required fields are in template
128
125
  if "keywords" not in tpl:
@@ -5,12 +5,8 @@ Initial work and maintenance by Holger Brunn @hbrunn
5
5
 
6
6
  import re
7
7
  from logging import getLogger
8
+ from re import Match
8
9
  from typing import Any
9
- from typing import Dict
10
- from typing import List
11
- from typing import Match
12
- from typing import Optional
13
- from typing import Union
14
10
 
15
11
 
16
12
  # from ..invoice_template import InvoiceTemplate # type: ignore[unused-ignore]
@@ -20,7 +16,7 @@ logger = getLogger(__name__)
20
16
  DEFAULT_OPTIONS = {"line_separator": r"\n"}
21
17
 
22
18
 
23
- def parse_line(patterns: Union[str, List[str]], line: str) -> Optional[Match[str]]:
19
+ def parse_line(patterns: str | list[str], line: str) -> Match[str] | None:
24
20
  """Parse a line using a given pattern or list of patterns.
25
21
 
26
22
  This function searches for a match in the given line using the provided
@@ -28,11 +24,11 @@ def parse_line(patterns: Union[str, List[str]], line: str) -> Optional[Match[str
28
24
  object; otherwise, it returns None.
29
25
 
30
26
  Args:
31
- patterns (Union[str, List[str]]): The pattern(s) to search for.
27
+ patterns (str | list[str]): The pattern(s) to search for.
32
28
  line (str): The line to parse.
33
29
 
34
30
  Returns:
35
- Optional[Match[str]]: A match object if a match is found, otherwise None.
31
+ Match[str] | None: A match object if a match is found, otherwise None.
36
32
  """
37
33
  patterns = patterns if isinstance(patterns, list) else [patterns]
38
34
  for pattern in patterns:
@@ -43,11 +39,11 @@ def parse_line(patterns: Union[str, List[str]], line: str) -> Optional[Match[str
43
39
 
44
40
 
45
41
  def parse_block( # noqa: RUF100 C901
46
- template: Dict[str, Any],
42
+ template: dict[str, Any],
47
43
  field: str,
48
- settings: Dict[str, Any],
44
+ settings: dict[str, Any],
49
45
  content: str,
50
- ) -> List[Dict[str, Any]]:
46
+ ) -> list[dict[str, Any]]:
51
47
  """Parse a block of lines to extract data.
52
48
 
53
49
  This function parses a block of lines from an invoice to extract data
@@ -56,13 +52,13 @@ def parse_block( # noqa: RUF100 C901
56
52
  based on the configuration.
57
53
 
58
54
  Args:
59
- template (Dict[str, Any]): The template containing extraction rules.
55
+ template (dict[str, Any]): The template containing extraction rules.
60
56
  field (str): The name of the field to extract.
61
- settings (Dict[str, Any]): The settings for the extraction rule.
57
+ settings (dict[str, Any]): The settings for the extraction rule.
62
58
  content (str): The text content to parse.
63
59
 
64
60
  Returns:
65
- List[Dict[str, Any]]: A list of dictionaries, where each dictionary
61
+ list[dict[str, Any]]: A list of dictionaries, where each dictionary
66
62
  represents an extracted row with field-value pairs.
67
63
  """
68
64
  # Validate settings
@@ -72,8 +68,8 @@ def parse_block( # noqa: RUF100 C901
72
68
 
73
69
  logger.debug("START lines block content ========================\n%s", content)
74
70
  logger.debug("END lines block content ==========================")
75
- lines: List[Dict[str, Any]] = []
76
- current_row: Dict[str, Any] = {}
71
+ lines: list[dict[str, Any]] = []
72
+ current_row: dict[str, Any] = {}
77
73
 
78
74
  # We assume that structured line fields may either be individual lines or
79
75
  # they may be main line items with descriptions or details following beneath.
@@ -167,21 +163,21 @@ def parse_block( # noqa: RUF100 C901
167
163
 
168
164
 
169
165
  def parse_by_rule(
170
- template: Dict[str, Any],
166
+ template: dict[str, Any],
171
167
  field: str,
172
- rule: Dict[str, Any],
168
+ rule: dict[str, Any],
173
169
  content: str,
174
- ) -> List[Dict[str, Any]]:
170
+ ) -> list[dict[str, Any]]:
175
171
  """Parse lines from a block of text based on a rule.
176
172
 
177
173
  Args:
178
- template (Dict[str, Any]): The template dictionary.
174
+ template (dict[str, Any]): The template dictionary.
179
175
  field (str): The field name.
180
- rule (Dict[str, Any]): The rule dictionary.
176
+ rule (dict[str, Any]): The rule dictionary.
181
177
  content (str): The text content to parse.
182
178
 
183
179
  Returns:
184
- List[Dict[str, Any]]: The parsed lines.
180
+ list[dict[str, Any]]: The parsed lines.
185
181
  """
186
182
  # First apply default options.
187
183
  settings = DEFAULT_OPTIONS.copy()
@@ -227,21 +223,21 @@ def parse_by_rule(
227
223
 
228
224
 
229
225
  def parse(
230
- template: Dict[str, Any],
226
+ template: dict[str, Any],
231
227
  field: str,
232
- settings: Dict[str, Any],
228
+ settings: dict[str, Any],
233
229
  content: str,
234
- ) -> List[Dict[str, Any]]:
230
+ ) -> list[dict[str, Any]]:
235
231
  """Parse lines from the content based on the given settings.
236
232
 
237
233
  Args:
238
- template (Dict[str, Any]): The template dictionary.
234
+ template (dict[str, Any]): The template dictionary.
239
235
  field (str): The field name.
240
- settings (Dict[str, Any]): The settings dictionary.
236
+ settings (dict[str, Any]): The settings dictionary.
241
237
  content (str): The text content to parse.
242
238
 
243
239
  Returns:
244
- List[Dict[str, Any]]: The parsed lines.
240
+ list[dict[str, Any]]: The parsed lines.
245
241
  """
246
242
  if "rules" in settings:
247
243
  # One field can have multiple sets of line-parsing rules
@@ -262,16 +258,16 @@ def parse(
262
258
 
263
259
 
264
260
  def parse_current_row(
265
- match: Optional[Match[str]], current_row: Dict[str, Any]
266
- ) -> Dict[str, Any]:
261
+ match: Match[str] | None, current_row: dict[str, Any]
262
+ ) -> dict[str, Any]:
267
263
  """Parse the current row data.
268
264
 
269
265
  Args:
270
- match (Optional[Match[str]]): The match object.
271
- current_row (Dict[str, Any]): The current row dictionary.
266
+ match (Match[str] | None): The match object.
267
+ current_row (dict[str, Any]): The current row dictionary.
272
268
 
273
269
  Returns:
274
- Dict[str, Any]: The updated current row dictionary.
270
+ dict[str, Any]: The updated current row dictionary.
275
271
  """
276
272
  if match:
277
273
  for field, value in match.groupdict().items():
@@ -15,9 +15,6 @@ import logging
15
15
  import re
16
16
  from collections import OrderedDict
17
17
  from typing import Any
18
- from typing import Dict
19
- from typing import List
20
- from typing import Optional
21
18
 
22
19
  from ..utils import _apply_grouping
23
20
 
@@ -28,7 +25,7 @@ logger = logging.getLogger(__name__)
28
25
  def parse(
29
26
  template: Any,
30
27
  field: str,
31
- settings: Dict[str, Any],
28
+ settings: dict[str, Any],
32
29
  content: str,
33
30
  legacy: bool = False,
34
31
  ) -> Any:
@@ -37,7 +34,7 @@ def parse(
37
34
  Args:
38
35
  template (Any): The template object.
39
36
  field (str): The name of the field to extract.
40
- settings (Dict[str, Any]): The settings for the field extraction.
37
+ settings (dict[str, Any]): The settings for the field extraction.
41
38
  content (str): The text content to parse.
42
39
  legacy (bool, optional): Whether to use legacy parsing. Defaults to False.
43
40
 
@@ -64,7 +61,7 @@ def parse(
64
61
  return result
65
62
 
66
63
 
67
- def _extract_matches(settings: Dict[str, Any], content: str) -> Optional[List[Any]]:
64
+ def _extract_matches(settings: dict[str, Any], content: str) -> list[Any] | None:
68
65
  """Extract matches from the content using the given regexes."""
69
66
  if isinstance(settings["regex"], list):
70
67
  regexes = settings["regex"]
@@ -100,8 +97,8 @@ def _extract_matches(settings: Dict[str, Any], content: str) -> Optional[List[An
100
97
 
101
98
 
102
99
  def _apply_type_coercion(
103
- template: Any, settings: Dict[str, Any], result: List[Any]
104
- ) -> List[Any]:
100
+ template: Any, settings: dict[str, Any], result: list[Any]
101
+ ) -> list[Any]:
105
102
  """Apply type coercion to the extracted values."""
106
103
  if "type" in settings:
107
104
  for k, v in enumerate(result):
@@ -109,7 +106,7 @@ def _apply_type_coercion(
109
106
  return result
110
107
 
111
108
 
112
- def _remove_duplicates(legacy: bool, result: Optional[Any]) -> Optional[Any]:
109
+ def _remove_duplicates(legacy: bool, result: Any | None) -> Any | None:
113
110
  """Remove duplicate values from the result."""
114
111
  if isinstance(result, list):
115
112
  if legacy: