pdfprep 0.1.2__tar.gz → 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pdfprep-0.1.2 → pdfprep-0.2.0}/PKG-INFO +36 -14
- {pdfprep-0.1.2 → pdfprep-0.2.0}/README.md +14 -0
- pdfprep-0.2.0/pyproject.toml +88 -0
- {pdfprep-0.1.2 → pdfprep-0.2.0}/src/pdfprep/__init__.py +1 -1
- {pdfprep-0.1.2 → pdfprep-0.2.0}/src/pdfprep.egg-info/PKG-INFO +36 -14
- pdfprep-0.2.0/src/pdfprep.egg-info/requires.txt +44 -0
- pdfprep-0.1.2/pyproject.toml +0 -65
- pdfprep-0.1.2/src/pdfprep.egg-info/requires.txt +0 -26
- {pdfprep-0.1.2 → pdfprep-0.2.0}/setup.cfg +0 -0
- {pdfprep-0.1.2 → pdfprep-0.2.0}/src/pdfprep/metadata.py +0 -0
- {pdfprep-0.1.2 → pdfprep-0.2.0}/src/pdfprep/ocr.py +0 -0
- {pdfprep-0.1.2 → pdfprep-0.2.0}/src/pdfprep/parsing.py +0 -0
- {pdfprep-0.1.2 → pdfprep-0.2.0}/src/pdfprep/table.py +0 -0
- {pdfprep-0.1.2 → pdfprep-0.2.0}/src/pdfprep.egg-info/SOURCES.txt +0 -0
- {pdfprep-0.1.2 → pdfprep-0.2.0}/src/pdfprep.egg-info/dependency_links.txt +0 -0
- {pdfprep-0.1.2 → pdfprep-0.2.0}/src/pdfprep.egg-info/entry_points.txt +0 -0
- {pdfprep-0.1.2 → pdfprep-0.2.0}/src/pdfprep.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: pdfprep
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.2.0
|
|
4
4
|
Summary: PDF 전처리 통합 도구 — 메타데이터, 텍스트 파싱, OCR, 표 추출
|
|
5
5
|
Author-email: uwpark <uwpark@simplatform.com>
|
|
6
6
|
License: MIT
|
|
@@ -8,8 +8,10 @@ Project-URL: Homepage, https://pypi.org/project/pdfprep/
|
|
|
8
8
|
Keywords: pdf,ocr,table-extraction,preprocessing,parsing,metadata
|
|
9
9
|
Classifier: Programming Language :: Python :: 3
|
|
10
10
|
Classifier: Programming Language :: Python :: 3.12
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
11
13
|
Classifier: License :: OSI Approved :: MIT License
|
|
12
|
-
Classifier: Operating System ::
|
|
14
|
+
Classifier: Operating System :: OS Independent
|
|
13
15
|
Classifier: Topic :: Text Processing
|
|
14
16
|
Classifier: Topic :: Scientific/Engineering :: Image Recognition
|
|
15
17
|
Classifier: Natural Language :: Korean
|
|
@@ -19,25 +21,31 @@ Requires-Dist: pypdf<6.0,>=4.0
|
|
|
19
21
|
Requires-Dist: pdfplumber>=0.11.0
|
|
20
22
|
Requires-Dist: pymupdf>=1.27.0
|
|
21
23
|
Requires-Dist: pillow>=10.0
|
|
22
|
-
Requires-Dist: numpy<2
|
|
23
|
-
Requires-Dist: setuptools>=65
|
|
24
24
|
Provides-Extra: ocr
|
|
25
25
|
Requires-Dist: pytesseract>=0.3.13; extra == "ocr"
|
|
26
|
-
Requires-Dist: paddleocr==2.7.3; extra == "ocr"
|
|
27
|
-
Requires-Dist: paddlepaddle==2.6.2; extra == "ocr"
|
|
26
|
+
Requires-Dist: paddleocr==2.7.3; python_full_version < "3.13" and extra == "ocr"
|
|
27
|
+
Requires-Dist: paddlepaddle==2.6.2; python_full_version < "3.13" and extra == "ocr"
|
|
28
|
+
Requires-Dist: numpy<2; python_full_version < "3.13" and extra == "ocr"
|
|
29
|
+
Requires-Dist: numpy<3,>=2; python_full_version >= "3.13" and extra == "ocr"
|
|
30
|
+
Requires-Dist: setuptools>=65; python_full_version < "3.13" and extra == "ocr"
|
|
28
31
|
Provides-Extra: table
|
|
29
|
-
Requires-Dist: camelot-py
|
|
32
|
+
Requires-Dist: camelot-py==1.0.9; extra == "table"
|
|
30
33
|
Requires-Dist: tabula-py>=2.10; extra == "table"
|
|
31
|
-
Requires-Dist: jpype1>=1.5; extra == "table"
|
|
32
|
-
Requires-Dist: layoutparser>=0.3.4; extra == "table"
|
|
34
|
+
Requires-Dist: jpype1>=1.5; python_full_version < "3.13" and extra == "table"
|
|
35
|
+
Requires-Dist: layoutparser>=0.3.4; python_full_version < "3.13" and extra == "table"
|
|
36
|
+
Requires-Dist: numpy<2; python_full_version < "3.13" and extra == "table"
|
|
37
|
+
Requires-Dist: numpy<3,>=2; python_full_version >= "3.13" and extra == "table"
|
|
33
38
|
Provides-Extra: all
|
|
34
39
|
Requires-Dist: pytesseract>=0.3.13; extra == "all"
|
|
35
|
-
Requires-Dist: paddleocr==2.7.3; extra == "all"
|
|
36
|
-
Requires-Dist: paddlepaddle==2.6.2; extra == "all"
|
|
37
|
-
Requires-Dist: camelot-py
|
|
40
|
+
Requires-Dist: paddleocr==2.7.3; python_full_version < "3.13" and extra == "all"
|
|
41
|
+
Requires-Dist: paddlepaddle==2.6.2; python_full_version < "3.13" and extra == "all"
|
|
42
|
+
Requires-Dist: camelot-py==1.0.9; extra == "all"
|
|
38
43
|
Requires-Dist: tabula-py>=2.10; extra == "all"
|
|
39
|
-
Requires-Dist: jpype1>=1.5; extra == "all"
|
|
40
|
-
Requires-Dist: layoutparser>=0.3.4; extra == "all"
|
|
44
|
+
Requires-Dist: jpype1>=1.5; python_full_version < "3.13" and extra == "all"
|
|
45
|
+
Requires-Dist: layoutparser>=0.3.4; python_full_version < "3.13" and extra == "all"
|
|
46
|
+
Requires-Dist: numpy<2; python_full_version < "3.13" and extra == "all"
|
|
47
|
+
Requires-Dist: numpy<3,>=2; python_full_version >= "3.13" and extra == "all"
|
|
48
|
+
Requires-Dist: setuptools>=65; python_full_version < "3.13" and extra == "all"
|
|
41
49
|
|
|
42
50
|
# pdfprep
|
|
43
51
|
|
|
@@ -65,6 +73,20 @@ pip install "pdfprep[table]"
|
|
|
65
73
|
pip install "pdfprep[all]"
|
|
66
74
|
```
|
|
67
75
|
|
|
76
|
+
> **Python 버전별 지원 범위 (Windows 포함)**
|
|
77
|
+
>
|
|
78
|
+
> | 기능 | Python 3.12 | Python 3.13 / 3.14 |
|
|
79
|
+
> | --- | --- | --- |
|
|
80
|
+
> | 기본 (메타데이터 · 파싱) | ✅ | ✅ |
|
|
81
|
+
> | OCR — tesseract | ✅ | ✅ |
|
|
82
|
+
> | OCR — paddleocr | ✅ | ⚠️ 설치 제외 |
|
|
83
|
+
> | 표 — camelot · tabula | ✅ | ✅ |
|
|
84
|
+
> | 표 — layoutparser | ✅ | ⚠️ 설치 제외 |
|
|
85
|
+
>
|
|
86
|
+
> `paddleocr`/`paddlepaddle`/`layoutparser` 는 아직 Python 3.13+ 휠을 제공하지 않아,
|
|
87
|
+
> 3.13/3.14 환경에서는 `pip install` 시 자동으로 제외됩니다 (설치는 정상 완료).
|
|
88
|
+
> 해당 엔진을 호출하면 안내 메시지와 함께 건너뜁니다. paddle 계열이 필요하면 Python 3.12 를 사용하세요.
|
|
89
|
+
|
|
68
90
|
### 시스템 패키지 (해당 기능을 쓸 때만)
|
|
69
91
|
|
|
70
92
|
| 기능 | 패키지 | 설치 |
|
|
@@ -24,6 +24,20 @@ pip install "pdfprep[table]"
|
|
|
24
24
|
pip install "pdfprep[all]"
|
|
25
25
|
```
|
|
26
26
|
|
|
27
|
+
> **Python 버전별 지원 범위 (Windows 포함)**
|
|
28
|
+
>
|
|
29
|
+
> | 기능 | Python 3.12 | Python 3.13 / 3.14 |
|
|
30
|
+
> | --- | --- | --- |
|
|
31
|
+
> | 기본 (메타데이터 · 파싱) | ✅ | ✅ |
|
|
32
|
+
> | OCR — tesseract | ✅ | ✅ |
|
|
33
|
+
> | OCR — paddleocr | ✅ | ⚠️ 설치 제외 |
|
|
34
|
+
> | 표 — camelot · tabula | ✅ | ✅ |
|
|
35
|
+
> | 표 — layoutparser | ✅ | ⚠️ 설치 제외 |
|
|
36
|
+
>
|
|
37
|
+
> `paddleocr`/`paddlepaddle`/`layoutparser` 는 아직 Python 3.13+ 휠을 제공하지 않아,
|
|
38
|
+
> 3.13/3.14 환경에서는 `pip install` 시 자동으로 제외됩니다 (설치는 정상 완료).
|
|
39
|
+
> 해당 엔진을 호출하면 안내 메시지와 함께 건너뜁니다. paddle 계열이 필요하면 Python 3.12 를 사용하세요.
|
|
40
|
+
|
|
27
41
|
### 시스템 패키지 (해당 기능을 쓸 때만)
|
|
28
42
|
|
|
29
43
|
| 기능 | 패키지 | 설치 |
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=65", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "pdfprep"
|
|
7
|
+
version = "0.2.0"
|
|
8
|
+
description = "PDF 전처리 통합 도구 — 메타데이터, 텍스트 파싱, OCR, 표 추출"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.12"
|
|
11
|
+
license = { text = "MIT" }
|
|
12
|
+
authors = [{ name = "uwpark", email = "uwpark@simplatform.com" }]
|
|
13
|
+
keywords = ["pdf", "ocr", "table-extraction", "preprocessing", "parsing", "metadata"]
|
|
14
|
+
classifiers = [
|
|
15
|
+
"Programming Language :: Python :: 3",
|
|
16
|
+
"Programming Language :: Python :: 3.12",
|
|
17
|
+
"Programming Language :: Python :: 3.13",
|
|
18
|
+
"Programming Language :: Python :: 3.14",
|
|
19
|
+
"License :: OSI Approved :: MIT License",
|
|
20
|
+
"Operating System :: OS Independent",
|
|
21
|
+
"Topic :: Text Processing",
|
|
22
|
+
"Topic :: Scientific/Engineering :: Image Recognition",
|
|
23
|
+
"Natural Language :: Korean",
|
|
24
|
+
]
|
|
25
|
+
|
|
26
|
+
# 핵심 의존성: pypdf / pdfplumber / pymupdf / pillow 만 사용.
|
|
27
|
+
# (metadata, parsing 모듈은 numpy 없이 동작 — numpy 는 OCR/표 extra 에서만 필요)
|
|
28
|
+
# 모두 Python 3.12~3.14 (Windows 포함) 휠을 제공하므로 base 설치가 전 버전에서 동작.
|
|
29
|
+
dependencies = [
|
|
30
|
+
"pypdf>=4.0,<6.0",
|
|
31
|
+
"pdfplumber>=0.11.0",
|
|
32
|
+
"pymupdf>=1.27.0",
|
|
33
|
+
"pillow>=10.0",
|
|
34
|
+
]
|
|
35
|
+
|
|
36
|
+
# numpy 정책:
|
|
37
|
+
# Python 3.13+ → numpy 2.x (3.14 등 신규 인터프리터 휠 제공, camelot/tabula 호환)
|
|
38
|
+
# Python 3.12 → numpy 1.x (paddlepaddle 2.6.2 / layoutparser 가 numpy 1.x ABI 로 빌드됨)
|
|
39
|
+
# paddlepaddle 2.6.2 / paddleocr / layoutparser / jpype1 은 Python 3.13+ 휠이 없어
|
|
40
|
+
# python_full_version < '3.13' 마커로 분기 → 3.13/3.14 에서는 설치 대상에서 자동 제외된다.
|
|
41
|
+
# → Python 3.13/3.14 에서는 base + tesseract OCR + camelot/tabula 표 추출이 numpy 2.x 로 동작하고,
|
|
42
|
+
# paddleOCR / layoutparser 는 제외된다 (해당 엔진 호출 시 런타임 안내 메시지 출력).
|
|
43
|
+
[project.optional-dependencies]
|
|
44
|
+
ocr = [
|
|
45
|
+
"pytesseract>=0.3.13",
|
|
46
|
+
"paddleocr==2.7.3 ; python_full_version < '3.13'",
|
|
47
|
+
"paddlepaddle==2.6.2 ; python_full_version < '3.13'",
|
|
48
|
+
# paddleocr 2.7.3 / paddlepaddle 2.6.2 는 numpy 1.x ABI 로 빌드됨 (3.12 한정)
|
|
49
|
+
"numpy<2 ; python_full_version < '3.13'",
|
|
50
|
+
"numpy>=2,<3 ; python_full_version >= '3.13'",
|
|
51
|
+
# paddlepaddle 2.6.2 가 distutils 대체로 setuptools 필요 (Python 3.12+ stdlib 에서 제거됨)
|
|
52
|
+
"setuptools>=65 ; python_full_version < '3.13'",
|
|
53
|
+
]
|
|
54
|
+
table = [
|
|
55
|
+
# camelot 1.0.9 / tabula-py 는 numpy 상한이 없어 numpy 2.x 와 호환 → 3.14 동작
|
|
56
|
+
"camelot-py==1.0.9",
|
|
57
|
+
"tabula-py>=2.10",
|
|
58
|
+
# jpype1 은 in-process Java 호출용(선택). 3.13+ 휠 부재 → tabula 는 java subprocess 로 대체 동작
|
|
59
|
+
"jpype1>=1.5 ; python_full_version < '3.13'",
|
|
60
|
+
# layoutparser 는 numpy 1.x + paddle 백엔드에 묶여 있어 <3.13 에서만
|
|
61
|
+
"layoutparser>=0.3.4 ; python_full_version < '3.13'",
|
|
62
|
+
"numpy<2 ; python_full_version < '3.13'",
|
|
63
|
+
"numpy>=2,<3 ; python_full_version >= '3.13'",
|
|
64
|
+
]
|
|
65
|
+
all = [
|
|
66
|
+
"pytesseract>=0.3.13",
|
|
67
|
+
"paddleocr==2.7.3 ; python_full_version < '3.13'",
|
|
68
|
+
"paddlepaddle==2.6.2 ; python_full_version < '3.13'",
|
|
69
|
+
"camelot-py==1.0.9",
|
|
70
|
+
"tabula-py>=2.10",
|
|
71
|
+
"jpype1>=1.5 ; python_full_version < '3.13'",
|
|
72
|
+
"layoutparser>=0.3.4 ; python_full_version < '3.13'",
|
|
73
|
+
"numpy<2 ; python_full_version < '3.13'",
|
|
74
|
+
"numpy>=2,<3 ; python_full_version >= '3.13'",
|
|
75
|
+
"setuptools>=65 ; python_full_version < '3.13'",
|
|
76
|
+
]
|
|
77
|
+
|
|
78
|
+
[project.urls]
|
|
79
|
+
Homepage = "https://pypi.org/project/pdfprep/"
|
|
80
|
+
|
|
81
|
+
[project.scripts]
|
|
82
|
+
pdfprep-metadata = "pdfprep.metadata:main"
|
|
83
|
+
pdfprep-parse = "pdfprep.parsing:main"
|
|
84
|
+
pdfprep-ocr = "pdfprep.ocr:main"
|
|
85
|
+
pdfprep-table = "pdfprep.table:main"
|
|
86
|
+
|
|
87
|
+
[tool.setuptools.packages.find]
|
|
88
|
+
where = ["src"]
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: pdfprep
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.2.0
|
|
4
4
|
Summary: PDF 전처리 통합 도구 — 메타데이터, 텍스트 파싱, OCR, 표 추출
|
|
5
5
|
Author-email: uwpark <uwpark@simplatform.com>
|
|
6
6
|
License: MIT
|
|
@@ -8,8 +8,10 @@ Project-URL: Homepage, https://pypi.org/project/pdfprep/
|
|
|
8
8
|
Keywords: pdf,ocr,table-extraction,preprocessing,parsing,metadata
|
|
9
9
|
Classifier: Programming Language :: Python :: 3
|
|
10
10
|
Classifier: Programming Language :: Python :: 3.12
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
11
13
|
Classifier: License :: OSI Approved :: MIT License
|
|
12
|
-
Classifier: Operating System ::
|
|
14
|
+
Classifier: Operating System :: OS Independent
|
|
13
15
|
Classifier: Topic :: Text Processing
|
|
14
16
|
Classifier: Topic :: Scientific/Engineering :: Image Recognition
|
|
15
17
|
Classifier: Natural Language :: Korean
|
|
@@ -19,25 +21,31 @@ Requires-Dist: pypdf<6.0,>=4.0
|
|
|
19
21
|
Requires-Dist: pdfplumber>=0.11.0
|
|
20
22
|
Requires-Dist: pymupdf>=1.27.0
|
|
21
23
|
Requires-Dist: pillow>=10.0
|
|
22
|
-
Requires-Dist: numpy<2
|
|
23
|
-
Requires-Dist: setuptools>=65
|
|
24
24
|
Provides-Extra: ocr
|
|
25
25
|
Requires-Dist: pytesseract>=0.3.13; extra == "ocr"
|
|
26
|
-
Requires-Dist: paddleocr==2.7.3; extra == "ocr"
|
|
27
|
-
Requires-Dist: paddlepaddle==2.6.2; extra == "ocr"
|
|
26
|
+
Requires-Dist: paddleocr==2.7.3; python_full_version < "3.13" and extra == "ocr"
|
|
27
|
+
Requires-Dist: paddlepaddle==2.6.2; python_full_version < "3.13" and extra == "ocr"
|
|
28
|
+
Requires-Dist: numpy<2; python_full_version < "3.13" and extra == "ocr"
|
|
29
|
+
Requires-Dist: numpy<3,>=2; python_full_version >= "3.13" and extra == "ocr"
|
|
30
|
+
Requires-Dist: setuptools>=65; python_full_version < "3.13" and extra == "ocr"
|
|
28
31
|
Provides-Extra: table
|
|
29
|
-
Requires-Dist: camelot-py
|
|
32
|
+
Requires-Dist: camelot-py==1.0.9; extra == "table"
|
|
30
33
|
Requires-Dist: tabula-py>=2.10; extra == "table"
|
|
31
|
-
Requires-Dist: jpype1>=1.5; extra == "table"
|
|
32
|
-
Requires-Dist: layoutparser>=0.3.4; extra == "table"
|
|
34
|
+
Requires-Dist: jpype1>=1.5; python_full_version < "3.13" and extra == "table"
|
|
35
|
+
Requires-Dist: layoutparser>=0.3.4; python_full_version < "3.13" and extra == "table"
|
|
36
|
+
Requires-Dist: numpy<2; python_full_version < "3.13" and extra == "table"
|
|
37
|
+
Requires-Dist: numpy<3,>=2; python_full_version >= "3.13" and extra == "table"
|
|
33
38
|
Provides-Extra: all
|
|
34
39
|
Requires-Dist: pytesseract>=0.3.13; extra == "all"
|
|
35
|
-
Requires-Dist: paddleocr==2.7.3; extra == "all"
|
|
36
|
-
Requires-Dist: paddlepaddle==2.6.2; extra == "all"
|
|
37
|
-
Requires-Dist: camelot-py
|
|
40
|
+
Requires-Dist: paddleocr==2.7.3; python_full_version < "3.13" and extra == "all"
|
|
41
|
+
Requires-Dist: paddlepaddle==2.6.2; python_full_version < "3.13" and extra == "all"
|
|
42
|
+
Requires-Dist: camelot-py==1.0.9; extra == "all"
|
|
38
43
|
Requires-Dist: tabula-py>=2.10; extra == "all"
|
|
39
|
-
Requires-Dist: jpype1>=1.5; extra == "all"
|
|
40
|
-
Requires-Dist: layoutparser>=0.3.4; extra == "all"
|
|
44
|
+
Requires-Dist: jpype1>=1.5; python_full_version < "3.13" and extra == "all"
|
|
45
|
+
Requires-Dist: layoutparser>=0.3.4; python_full_version < "3.13" and extra == "all"
|
|
46
|
+
Requires-Dist: numpy<2; python_full_version < "3.13" and extra == "all"
|
|
47
|
+
Requires-Dist: numpy<3,>=2; python_full_version >= "3.13" and extra == "all"
|
|
48
|
+
Requires-Dist: setuptools>=65; python_full_version < "3.13" and extra == "all"
|
|
41
49
|
|
|
42
50
|
# pdfprep
|
|
43
51
|
|
|
@@ -65,6 +73,20 @@ pip install "pdfprep[table]"
|
|
|
65
73
|
pip install "pdfprep[all]"
|
|
66
74
|
```
|
|
67
75
|
|
|
76
|
+
> **Python 버전별 지원 범위 (Windows 포함)**
|
|
77
|
+
>
|
|
78
|
+
> | 기능 | Python 3.12 | Python 3.13 / 3.14 |
|
|
79
|
+
> | --- | --- | --- |
|
|
80
|
+
> | 기본 (메타데이터 · 파싱) | ✅ | ✅ |
|
|
81
|
+
> | OCR — tesseract | ✅ | ✅ |
|
|
82
|
+
> | OCR — paddleocr | ✅ | ⚠️ 설치 제외 |
|
|
83
|
+
> | 표 — camelot · tabula | ✅ | ✅ |
|
|
84
|
+
> | 표 — layoutparser | ✅ | ⚠️ 설치 제외 |
|
|
85
|
+
>
|
|
86
|
+
> `paddleocr`/`paddlepaddle`/`layoutparser` 는 아직 Python 3.13+ 휠을 제공하지 않아,
|
|
87
|
+
> 3.13/3.14 환경에서는 `pip install` 시 자동으로 제외됩니다 (설치는 정상 완료).
|
|
88
|
+
> 해당 엔진을 호출하면 안내 메시지와 함께 건너뜁니다. paddle 계열이 필요하면 Python 3.12 를 사용하세요.
|
|
89
|
+
|
|
68
90
|
### 시스템 패키지 (해당 기능을 쓸 때만)
|
|
69
91
|
|
|
70
92
|
| 기능 | 패키지 | 설치 |
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
pypdf<6.0,>=4.0
|
|
2
|
+
pdfplumber>=0.11.0
|
|
3
|
+
pymupdf>=1.27.0
|
|
4
|
+
pillow>=10.0
|
|
5
|
+
|
|
6
|
+
[all]
|
|
7
|
+
pytesseract>=0.3.13
|
|
8
|
+
camelot-py==1.0.9
|
|
9
|
+
tabula-py>=2.10
|
|
10
|
+
|
|
11
|
+
[all:python_full_version < "3.13"]
|
|
12
|
+
paddleocr==2.7.3
|
|
13
|
+
paddlepaddle==2.6.2
|
|
14
|
+
jpype1>=1.5
|
|
15
|
+
layoutparser>=0.3.4
|
|
16
|
+
numpy<2
|
|
17
|
+
setuptools>=65
|
|
18
|
+
|
|
19
|
+
[all:python_full_version >= "3.13"]
|
|
20
|
+
numpy<3,>=2
|
|
21
|
+
|
|
22
|
+
[ocr]
|
|
23
|
+
pytesseract>=0.3.13
|
|
24
|
+
|
|
25
|
+
[ocr:python_full_version < "3.13"]
|
|
26
|
+
paddleocr==2.7.3
|
|
27
|
+
paddlepaddle==2.6.2
|
|
28
|
+
numpy<2
|
|
29
|
+
setuptools>=65
|
|
30
|
+
|
|
31
|
+
[ocr:python_full_version >= "3.13"]
|
|
32
|
+
numpy<3,>=2
|
|
33
|
+
|
|
34
|
+
[table]
|
|
35
|
+
camelot-py==1.0.9
|
|
36
|
+
tabula-py>=2.10
|
|
37
|
+
|
|
38
|
+
[table:python_full_version < "3.13"]
|
|
39
|
+
jpype1>=1.5
|
|
40
|
+
layoutparser>=0.3.4
|
|
41
|
+
numpy<2
|
|
42
|
+
|
|
43
|
+
[table:python_full_version >= "3.13"]
|
|
44
|
+
numpy<3,>=2
|
pdfprep-0.1.2/pyproject.toml
DELETED
|
@@ -1,65 +0,0 @@
|
|
|
1
|
-
[build-system]
|
|
2
|
-
requires = ["setuptools>=65", "wheel"]
|
|
3
|
-
build-backend = "setuptools.build_meta"
|
|
4
|
-
|
|
5
|
-
[project]
|
|
6
|
-
name = "pdfprep"
|
|
7
|
-
version = "0.1.2"
|
|
8
|
-
description = "PDF 전처리 통합 도구 — 메타데이터, 텍스트 파싱, OCR, 표 추출"
|
|
9
|
-
readme = "README.md"
|
|
10
|
-
requires-python = ">=3.12"
|
|
11
|
-
license = { text = "MIT" }
|
|
12
|
-
authors = [{ name = "uwpark", email = "uwpark@simplatform.com" }]
|
|
13
|
-
keywords = ["pdf", "ocr", "table-extraction", "preprocessing", "parsing", "metadata"]
|
|
14
|
-
classifiers = [
|
|
15
|
-
"Programming Language :: Python :: 3",
|
|
16
|
-
"Programming Language :: Python :: 3.12",
|
|
17
|
-
"License :: OSI Approved :: MIT License",
|
|
18
|
-
"Operating System :: POSIX :: Linux",
|
|
19
|
-
"Topic :: Text Processing",
|
|
20
|
-
"Topic :: Scientific/Engineering :: Image Recognition",
|
|
21
|
-
"Natural Language :: Korean",
|
|
22
|
-
]
|
|
23
|
-
|
|
24
|
-
dependencies = [
|
|
25
|
-
"pypdf>=4.0,<6.0",
|
|
26
|
-
"pdfplumber>=0.11.0",
|
|
27
|
-
"pymupdf>=1.27.0",
|
|
28
|
-
"pillow>=10.0",
|
|
29
|
-
"numpy<2",
|
|
30
|
-
"setuptools>=65",
|
|
31
|
-
]
|
|
32
|
-
|
|
33
|
-
[project.optional-dependencies]
|
|
34
|
-
ocr = [
|
|
35
|
-
"pytesseract>=0.3.13",
|
|
36
|
-
"paddleocr==2.7.3",
|
|
37
|
-
"paddlepaddle==2.6.2",
|
|
38
|
-
]
|
|
39
|
-
table = [
|
|
40
|
-
"camelot-py[base]==1.0.9",
|
|
41
|
-
"tabula-py>=2.10",
|
|
42
|
-
"jpype1>=1.5",
|
|
43
|
-
"layoutparser>=0.3.4",
|
|
44
|
-
]
|
|
45
|
-
all = [
|
|
46
|
-
"pytesseract>=0.3.13",
|
|
47
|
-
"paddleocr==2.7.3",
|
|
48
|
-
"paddlepaddle==2.6.2",
|
|
49
|
-
"camelot-py[base]==1.0.9",
|
|
50
|
-
"tabula-py>=2.10",
|
|
51
|
-
"jpype1>=1.5",
|
|
52
|
-
"layoutparser>=0.3.4",
|
|
53
|
-
]
|
|
54
|
-
|
|
55
|
-
[project.urls]
|
|
56
|
-
Homepage = "https://pypi.org/project/pdfprep/"
|
|
57
|
-
|
|
58
|
-
[project.scripts]
|
|
59
|
-
pdfprep-metadata = "pdfprep.metadata:main"
|
|
60
|
-
pdfprep-parse = "pdfprep.parsing:main"
|
|
61
|
-
pdfprep-ocr = "pdfprep.ocr:main"
|
|
62
|
-
pdfprep-table = "pdfprep.table:main"
|
|
63
|
-
|
|
64
|
-
[tool.setuptools.packages.find]
|
|
65
|
-
where = ["src"]
|
|
@@ -1,26 +0,0 @@
|
|
|
1
|
-
pypdf<6.0,>=4.0
|
|
2
|
-
pdfplumber>=0.11.0
|
|
3
|
-
pymupdf>=1.27.0
|
|
4
|
-
pillow>=10.0
|
|
5
|
-
numpy<2
|
|
6
|
-
setuptools>=65
|
|
7
|
-
|
|
8
|
-
[all]
|
|
9
|
-
pytesseract>=0.3.13
|
|
10
|
-
paddleocr==2.7.3
|
|
11
|
-
paddlepaddle==2.6.2
|
|
12
|
-
camelot-py[base]==1.0.9
|
|
13
|
-
tabula-py>=2.10
|
|
14
|
-
jpype1>=1.5
|
|
15
|
-
layoutparser>=0.3.4
|
|
16
|
-
|
|
17
|
-
[ocr]
|
|
18
|
-
pytesseract>=0.3.13
|
|
19
|
-
paddleocr==2.7.3
|
|
20
|
-
paddlepaddle==2.6.2
|
|
21
|
-
|
|
22
|
-
[table]
|
|
23
|
-
camelot-py[base]==1.0.9
|
|
24
|
-
tabula-py>=2.10
|
|
25
|
-
jpype1>=1.5
|
|
26
|
-
layoutparser>=0.3.4
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|