polytext 0.2.2b1__tar.gz → 0.2.2b2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. {polytext-0.2.2b1 → polytext-0.2.2b2}/PKG-INFO +1 -1
  2. {polytext-0.2.2b1 → polytext-0.2.2b2}/polytext/loader/base.py +8 -0
  3. {polytext-0.2.2b1 → polytext-0.2.2b2}/polytext.egg-info/PKG-INFO +1 -1
  4. {polytext-0.2.2b1 → polytext-0.2.2b2}/setup.py +1 -1
  5. {polytext-0.2.2b1 → polytext-0.2.2b2}/LICENSE +0 -0
  6. {polytext-0.2.2b1 → polytext-0.2.2b2}/README.md +0 -0
  7. {polytext-0.2.2b1 → polytext-0.2.2b2}/polytext/__init__.py +0 -0
  8. {polytext-0.2.2b1 → polytext-0.2.2b2}/polytext/converter/__init__.py +0 -0
  9. {polytext-0.2.2b1 → polytext-0.2.2b2}/polytext/converter/audio_to_text.py +0 -0
  10. {polytext-0.2.2b1 → polytext-0.2.2b2}/polytext/converter/base.py +0 -0
  11. {polytext-0.2.2b1 → polytext-0.2.2b2}/polytext/converter/document_ocr_to_text.py +0 -0
  12. {polytext-0.2.2b1 → polytext-0.2.2b2}/polytext/converter/document_ocr_to_text_azure_oai.py +0 -0
  13. {polytext-0.2.2b1 → polytext-0.2.2b2}/polytext/converter/gemini_quality_guards.py +0 -0
  14. {polytext-0.2.2b1 → polytext-0.2.2b2}/polytext/converter/html_to_md.py +0 -0
  15. {polytext-0.2.2b1 → polytext-0.2.2b2}/polytext/converter/md_to_text.py +0 -0
  16. {polytext-0.2.2b1 → polytext-0.2.2b2}/polytext/converter/ocr_to_text.py +0 -0
  17. {polytext-0.2.2b1 → polytext-0.2.2b2}/polytext/converter/ocr_to_text_azure_oai.py +0 -0
  18. {polytext-0.2.2b1 → polytext-0.2.2b2}/polytext/converter/pdf.py +0 -0
  19. {polytext-0.2.2b1 → polytext-0.2.2b2}/polytext/converter/text_to_md.py +0 -0
  20. {polytext-0.2.2b1 → polytext-0.2.2b2}/polytext/converter/video_to_audio.py +0 -0
  21. {polytext-0.2.2b1 → polytext-0.2.2b2}/polytext/exceptions/__init__.py +0 -0
  22. {polytext-0.2.2b1 → polytext-0.2.2b2}/polytext/exceptions/base.py +0 -0
  23. {polytext-0.2.2b1 → polytext-0.2.2b2}/polytext/generator/__init__.py +0 -0
  24. {polytext-0.2.2b1 → polytext-0.2.2b2}/polytext/generator/pdf.py +0 -0
  25. {polytext-0.2.2b1 → polytext-0.2.2b2}/polytext/loader/__init__.py +0 -0
  26. {polytext-0.2.2b1 → polytext-0.2.2b2}/polytext/loader/audio.py +0 -0
  27. {polytext-0.2.2b1 → polytext-0.2.2b2}/polytext/loader/document.py +0 -0
  28. {polytext-0.2.2b1 → polytext-0.2.2b2}/polytext/loader/document_ocr.py +0 -0
  29. {polytext-0.2.2b1 → polytext-0.2.2b2}/polytext/loader/downloader/__init__.py +0 -0
  30. {polytext-0.2.2b1 → polytext-0.2.2b2}/polytext/loader/downloader/downloader.py +0 -0
  31. {polytext-0.2.2b1 → polytext-0.2.2b2}/polytext/loader/html.py +0 -0
  32. {polytext-0.2.2b1 → polytext-0.2.2b2}/polytext/loader/markdown.py +0 -0
  33. {polytext-0.2.2b1 → polytext-0.2.2b2}/polytext/loader/notebook.py +0 -0
  34. {polytext-0.2.2b1 → polytext-0.2.2b2}/polytext/loader/ocr.py +0 -0
  35. {polytext-0.2.2b1 → polytext-0.2.2b2}/polytext/loader/plain_text.py +0 -0
  36. {polytext-0.2.2b1 → polytext-0.2.2b2}/polytext/loader/video.py +0 -0
  37. {polytext-0.2.2b1 → polytext-0.2.2b2}/polytext/loader/xml_xbrl.py +0 -0
  38. {polytext-0.2.2b1 → polytext-0.2.2b2}/polytext/loader/youtube.py +0 -0
  39. {polytext-0.2.2b1 → polytext-0.2.2b2}/polytext/loader/youtube_llm.py +0 -0
  40. {polytext-0.2.2b1 → polytext-0.2.2b2}/polytext/processor/__init__.py +0 -0
  41. {polytext-0.2.2b1 → polytext-0.2.2b2}/polytext/processor/audio_chunker.py +0 -0
  42. {polytext-0.2.2b1 → polytext-0.2.2b2}/polytext/processor/text_merger.py +0 -0
  43. {polytext-0.2.2b1 → polytext-0.2.2b2}/polytext/processor/transcript_chunker.py +0 -0
  44. {polytext-0.2.2b1 → polytext-0.2.2b2}/polytext/prompts/__init__.py +0 -0
  45. {polytext-0.2.2b1 → polytext-0.2.2b2}/polytext/prompts/ocr.py +0 -0
  46. {polytext-0.2.2b1 → polytext-0.2.2b2}/polytext/prompts/text_merging.py +0 -0
  47. {polytext-0.2.2b1 → polytext-0.2.2b2}/polytext/prompts/text_to_md.py +0 -0
  48. {polytext-0.2.2b1 → polytext-0.2.2b2}/polytext/prompts/transcription.py +0 -0
  49. {polytext-0.2.2b1 → polytext-0.2.2b2}/polytext/utils/__init__.py +0 -0
  50. {polytext-0.2.2b1 → polytext-0.2.2b2}/polytext/utils/utils.py +0 -0
  51. {polytext-0.2.2b1 → polytext-0.2.2b2}/polytext.egg-info/SOURCES.txt +0 -0
  52. {polytext-0.2.2b1 → polytext-0.2.2b2}/polytext.egg-info/dependency_links.txt +0 -0
  53. {polytext-0.2.2b1 → polytext-0.2.2b2}/polytext.egg-info/not-zip-safe +0 -0
  54. {polytext-0.2.2b1 → polytext-0.2.2b2}/polytext.egg-info/requires.txt +0 -0
  55. {polytext-0.2.2b1 → polytext-0.2.2b2}/polytext.egg-info/top_level.txt +0 -0
  56. {polytext-0.2.2b1 → polytext-0.2.2b2}/pyproject.toml +0 -0
  57. {polytext-0.2.2b1 → polytext-0.2.2b2}/setup.cfg +0 -0
  58. {polytext-0.2.2b1 → polytext-0.2.2b2}/tests/test_audio_chunker.py +0 -0
  59. {polytext-0.2.2b1 → polytext-0.2.2b2}/tests/test_audio_comparison_helpers.py +0 -0
  60. {polytext-0.2.2b1 → polytext-0.2.2b2}/tests/test_audio_transcription_model_migration.py +0 -0
  61. {polytext-0.2.2b1 → polytext-0.2.2b2}/tests/test_compare_audio_models.py +0 -0
  62. {polytext-0.2.2b1 → polytext-0.2.2b2}/tests/test_compare_document_ocr_to_text_models.py +0 -0
  63. {polytext-0.2.2b1 → polytext-0.2.2b2}/tests/test_compare_ocr_to_text_models.py +0 -0
  64. {polytext-0.2.2b1 → polytext-0.2.2b2}/tests/test_compare_youtube_models.py +0 -0
  65. {polytext-0.2.2b1 → polytext-0.2.2b2}/tests/test_dowload_audio_from_youtube.py +0 -0
  66. {polytext-0.2.2b1 → polytext-0.2.2b2}/tests/test_dowload_audio_from_youtube_helpers.py +0 -0
  67. {polytext-0.2.2b1 → polytext-0.2.2b2}/tests/test_extracted_text_whitespace.py +0 -0
  68. {polytext-0.2.2b1 → polytext-0.2.2b2}/tests/test_get_audio_transcript_from_gcs.py +0 -0
  69. {polytext-0.2.2b1 → polytext-0.2.2b2}/tests/test_get_customized_pdf_from_markdown.py +0 -0
  70. {polytext-0.2.2b1 → polytext-0.2.2b2}/tests/test_get_document_ocr.py +0 -0
  71. {polytext-0.2.2b1 → polytext-0.2.2b2}/tests/test_get_document_ocr_azure_oai.py +0 -0
  72. {polytext-0.2.2b1 → polytext-0.2.2b2}/tests/test_get_document_text.py +0 -0
  73. {polytext-0.2.2b1 → polytext-0.2.2b2}/tests/test_get_document_text_from_gcs.py +0 -0
  74. {polytext-0.2.2b1 → polytext-0.2.2b2}/tests/test_get_ocr_from_image.py +0 -0
  75. {polytext-0.2.2b1 → polytext-0.2.2b2}/tests/test_get_text_from_markdown.py +0 -0
  76. {polytext-0.2.2b1 → polytext-0.2.2b2}/tests/test_get_video_transcript_from_gcs.py +0 -0
  77. {polytext-0.2.2b1 → polytext-0.2.2b2}/tests/test_library.py +0 -0
  78. {polytext-0.2.2b1 → polytext-0.2.2b2}/tests/test_markdown_loader_gzip.py +0 -0
  79. {polytext-0.2.2b1 → polytext-0.2.2b2}/tests/test_markitdown_html.py +0 -0
  80. {polytext-0.2.2b1 → polytext-0.2.2b2}/tests/test_notebook_loader.py +0 -0
  81. {polytext-0.2.2b1 → polytext-0.2.2b2}/tests/test_ocr_fallbacks.py +0 -0
  82. {polytext-0.2.2b1 → polytext-0.2.2b2}/tests/test_ocr_image_descriptions.py +0 -0
  83. {polytext-0.2.2b1 → polytext-0.2.2b2}/tests/test_pain_text.py +0 -0
  84. {polytext-0.2.2b1 → polytext-0.2.2b2}/tests/test_split_audio_with_llm.py +0 -0
  85. {polytext-0.2.2b1 → polytext-0.2.2b2}/tests/test_xml_xbrl_loader.py +0 -0
  86. {polytext-0.2.2b1 → polytext-0.2.2b2}/tests/test_youtube_gemini_minimal_check.py +0 -0
  87. {polytext-0.2.2b1 → polytext-0.2.2b2}/tests/test_youtube_transcript.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: polytext
3
- Version: 0.2.2b1
3
+ Version: 0.2.2b2
4
4
  Summary: Python utilities to simplify document files management
5
5
  Home-page: https://github.com/docsity/polytext
6
6
  Author: Matteo Senardi
@@ -317,6 +317,14 @@ class BaseLoader:
317
317
  return YoutubeTranscriptLoaderWithLlm(llm_api_key=llm_api_key, markdown_output=self.markdown_output, temp_dir=self.temp_dir, timeout_minutes=self.timeout_minutes, **kwargs)
318
318
  else:
319
319
  return HtmlLoader(markdown_output=self.markdown_output)
320
+ if file_extension in [".md", ".markdown"] or (
321
+ mime_type and mime_type.startswith("text/markdown")
322
+ ):
323
+ return MarkdownLoader(
324
+ markdown_output=self.markdown_output,
325
+ temp_dir=self.temp_dir,
326
+ **kwargs,
327
+ )
320
328
  elif mime_type:
321
329
  if file_extension in [".pdf", ".xlsx", ".docx", ".txt", ".csv", ".odt", ".pptx", ".xls", ".doc", ".ppt", ".rtf"]:
322
330
  return DocumentLoader(markdown_output=self.markdown_output, temp_dir=self.temp_dir, timeout_minutes=self.timeout_minutes, **kwargs)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: polytext
3
- Version: 0.2.2b1
3
+ Version: 0.2.2b2
4
4
  Summary: Python utilities to simplify document files management
5
5
  Home-page: https://github.com/docsity/polytext
6
6
  Author: Matteo Senardi
@@ -51,7 +51,7 @@ def get_requirements(*requirements_file):
51
51
 
52
52
  setup(
53
53
  name='polytext',
54
- version='0.2.2b1',
54
+ version='0.2.2b2',
55
55
  url='https://github.com/docsity/polytext',
56
56
  # download_url='https://github.com/pualien/py-polytext/archive/0.1.23.tar.gz',
57
57
  license='MIT',
File without changes
File without changes
File without changes
File without changes