navexOCR 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
File without changes
@@ -0,0 +1,18 @@
1
+ Metadata-Version: 2.4
2
+ Name: navexOCR
3
+ Version: 1.0.0
4
+ Summary: AI OCR PDF to Word Conversion Engine
5
+ Author: Naveen S
6
+ Requires-Dist: fastapi
7
+ Requires-Dist: uvicorn
8
+ Requires-Dist: python-multipart
9
+ Requires-Dist: pymupdf
10
+ Requires-Dist: pillow
11
+ Requires-Dist: paddleocr==2.7.0.3
12
+ Requires-Dist: paddlepaddle==2.6.2
13
+ Requires-Dist: pywin32
14
+ Requires-Dist: numpy==1.26.4
15
+ Requires-Dist: pyfiglet
16
+ Dynamic: author
17
+ Dynamic: requires-dist
18
+ Dynamic: summary
File without changes
@@ -0,0 +1 @@
1
+ from navexOCR.api import convert_pdf_file
@@ -0,0 +1,47 @@
1
+ import os
2
+ import uuid
3
+
4
+ from navexOCR.services.ocr_service import (
5
+ create_searchable_pdf
6
+ )
7
+
8
+ from navexOCR.services.word_service import (
9
+ pdf_to_word
10
+ )
11
+
12
+ from navexOCR.config import TEMP_DIR
13
+
14
+
15
+ def convert_pdf_file(input_pdf):
16
+
17
+ job_id = str(uuid.uuid4())
18
+
19
+ work_dir = os.path.join(
20
+ TEMP_DIR,
21
+ job_id
22
+ )
23
+
24
+ os.makedirs(work_dir, exist_ok=True)
25
+
26
+ searchable_pdf = os.path.join(
27
+ work_dir,
28
+ "searchable.pdf"
29
+ )
30
+
31
+ output_docx = os.path.join(
32
+ work_dir,
33
+ "output.docx"
34
+ )
35
+
36
+ create_searchable_pdf(
37
+ input_pdf,
38
+ searchable_pdf
39
+ )
40
+
41
+ pdf_to_word(
42
+ searchable_pdf,
43
+ output_docx,
44
+ work_dir
45
+ )
46
+
47
+ return output_docx
@@ -0,0 +1,38 @@
1
+ import os
2
+
3
+ BASE_DIR = os.path.dirname(
4
+ os.path.dirname(os.path.abspath(__file__))
5
+ )
6
+
7
+ TEMP_DIR = os.path.join(
8
+ BASE_DIR,
9
+ "navexOCR",
10
+ "temp"
11
+ )
12
+
13
+ OUTPUT_DIR = os.path.join(
14
+ BASE_DIR,
15
+ "outputs"
16
+ )
17
+
18
+ os.makedirs(TEMP_DIR, exist_ok=True)
19
+
20
+ os.makedirs(OUTPUT_DIR, exist_ok=True)
21
+
22
+ DET_MODEL_DIR = os.path.join(
23
+ BASE_DIR,
24
+ "models",
25
+ "det"
26
+ )
27
+
28
+ REC_MODEL_DIR = os.path.join(
29
+ BASE_DIR,
30
+ "models",
31
+ "rec"
32
+ )
33
+
34
+ CLS_MODEL_DIR = os.path.join(
35
+ BASE_DIR,
36
+ "models",
37
+ "cls"
38
+ )
@@ -0,0 +1,32 @@
1
+ from fastapi import FastAPI
2
+
3
+ from navexOCR.routes.convert import router
4
+
5
+ import pyfiglet
6
+
7
+
8
+ banner = pyfiglet.figlet_format(
9
+ "navexOCR",
10
+ font="slant"
11
+ )
12
+
13
+ print(banner)
14
+
15
+
16
+ app = FastAPI(
17
+
18
+ title="navexOCR API",
19
+
20
+ version="1.0.0"
21
+ )
22
+
23
+ app.include_router(router)
24
+
25
+
26
+ @app.get("/")
27
+ def home():
28
+
29
+ return {
30
+
31
+ "message": "navexOCR API Running"
32
+ }
@@ -0,0 +1,18 @@
1
+ Metadata-Version: 2.4
2
+ Name: navexOCR
3
+ Version: 1.0.0
4
+ Summary: AI OCR PDF to Word Conversion Engine
5
+ Author: Naveen S
6
+ Requires-Dist: fastapi
7
+ Requires-Dist: uvicorn
8
+ Requires-Dist: python-multipart
9
+ Requires-Dist: pymupdf
10
+ Requires-Dist: pillow
11
+ Requires-Dist: paddleocr==2.7.0.3
12
+ Requires-Dist: paddlepaddle==2.6.2
13
+ Requires-Dist: pywin32
14
+ Requires-Dist: numpy==1.26.4
15
+ Requires-Dist: pyfiglet
16
+ Dynamic: author
17
+ Dynamic: requires-dist
18
+ Dynamic: summary
@@ -0,0 +1,12 @@
1
+ MANIFEST.in
2
+ README.md
3
+ setup.py
4
+ navexOCR/__init__.py
5
+ navexOCR/api.py
6
+ navexOCR/config.py
7
+ navexOCR/main.py
8
+ navexOCR.egg-info/PKG-INFO
9
+ navexOCR.egg-info/SOURCES.txt
10
+ navexOCR.egg-info/dependency_links.txt
11
+ navexOCR.egg-info/requires.txt
12
+ navexOCR.egg-info/top_level.txt
@@ -0,0 +1,10 @@
1
+ fastapi
2
+ uvicorn
3
+ python-multipart
4
+ pymupdf
5
+ pillow
6
+ paddleocr==2.7.0.3
7
+ paddlepaddle==2.6.2
8
+ pywin32
9
+ numpy==1.26.4
10
+ pyfiglet
@@ -0,0 +1 @@
1
+ navexOCR
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,30 @@
1
+ from setuptools import setup, find_packages
2
+
3
+ setup(
4
+
5
+ name="navexOCR",
6
+
7
+ version="1.0.0",
8
+
9
+ author="Naveen S",
10
+
11
+ description="AI OCR PDF to Word Conversion Engine",
12
+
13
+ packages=find_packages(),
14
+
15
+ include_package_data=True,
16
+
17
+ install_requires=[
18
+
19
+ "fastapi",
20
+ "uvicorn",
21
+ "python-multipart",
22
+ "pymupdf",
23
+ "pillow",
24
+ "paddleocr==2.7.0.3",
25
+ "paddlepaddle==2.6.2",
26
+ "pywin32",
27
+ "numpy==1.26.4",
28
+ "pyfiglet"
29
+ ]
30
+ )