cat-stack 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cat_stack/__about__.py +10 -0
- cat_stack/__init__.py +128 -0
- cat_stack/_batch.py +1388 -0
- cat_stack/_category_analysis.py +348 -0
- cat_stack/_chunked.py +424 -0
- cat_stack/_embeddings.py +189 -0
- cat_stack/_formatter.py +169 -0
- cat_stack/_providers.py +1048 -0
- cat_stack/_tiebreaker.py +277 -0
- cat_stack/_utils.py +512 -0
- cat_stack/_web_fetch.py +194 -0
- cat_stack/calls/CoVe.py +287 -0
- cat_stack/calls/__init__.py +25 -0
- cat_stack/calls/all_calls.py +622 -0
- cat_stack/calls/image_CoVe.py +386 -0
- cat_stack/calls/image_stepback.py +210 -0
- cat_stack/calls/pdf_CoVe.py +386 -0
- cat_stack/calls/pdf_stepback.py +210 -0
- cat_stack/calls/stepback.py +180 -0
- cat_stack/calls/top_n.py +217 -0
- cat_stack/classify.py +682 -0
- cat_stack/explore.py +111 -0
- cat_stack/extract.py +218 -0
- cat_stack/image_functions.py +2078 -0
- cat_stack/images/circle.png +0 -0
- cat_stack/images/cube.png +0 -0
- cat_stack/images/diamond.png +0 -0
- cat_stack/images/overlapping_pentagons.png +0 -0
- cat_stack/images/rectangles.png +0 -0
- cat_stack/model_reference_list.py +94 -0
- cat_stack/pdf_functions.py +2087 -0
- cat_stack/summarize.py +290 -0
- cat_stack/text_functions.py +1358 -0
- cat_stack/text_functions_ensemble.py +3644 -0
- cat_stack-0.1.0.dist-info/METADATA +150 -0
- cat_stack-0.1.0.dist-info/RECORD +38 -0
- cat_stack-0.1.0.dist-info/WHEEL +4 -0
- cat_stack-0.1.0.dist-info/licenses/LICENSE +672 -0
cat_stack/__about__.py
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: 2025-present Christopher Soria <chrissoria@berkeley.edu>
|
|
2
|
+
#
|
|
3
|
+
# SPDX-License-Identifier: GPL-3.0-or-later
|
|
4
|
+
__version__ = "0.1.0"
|
|
5
|
+
__author__ = "Chris Soria"
|
|
6
|
+
__email__ = "chrissoria@berkeley.edu"
|
|
7
|
+
__title__ = "cat-stack"
|
|
8
|
+
__description__ = "Domain-agnostic text, image, and PDF classification engine powered by LLMs"
|
|
9
|
+
__url__ = "https://github.com/chrissoria/cat-stack"
|
|
10
|
+
__license__ = "GPL-3.0"
|
cat_stack/__init__.py
ADDED
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: 2025-present Christopher Soria <chrissoria@berkeley.edu>
|
|
2
|
+
#
|
|
3
|
+
# SPDX-License-Identifier: GPL-3.0-or-later
|
|
4
|
+
|
|
5
|
+
from .__about__ import (
|
|
6
|
+
__version__,
|
|
7
|
+
__author__,
|
|
8
|
+
__description__,
|
|
9
|
+
__title__,
|
|
10
|
+
__url__,
|
|
11
|
+
__license__,
|
|
12
|
+
)
|
|
13
|
+
|
|
14
|
+
# =============================================================================
|
|
15
|
+
# Public API - Organized by function type
|
|
16
|
+
# =============================================================================
|
|
17
|
+
|
|
18
|
+
# Main entry points
|
|
19
|
+
from .extract import extract
|
|
20
|
+
from .explore import explore
|
|
21
|
+
from .classify import classify
|
|
22
|
+
from .summarize import summarize
|
|
23
|
+
|
|
24
|
+
# Category analysis
|
|
25
|
+
from ._category_analysis import has_other_category, check_category_verbosity
|
|
26
|
+
|
|
27
|
+
# Web fetching utilities
|
|
28
|
+
from ._web_fetch import is_url, fetch_url_text, fetch_urls, detect_url_input, strip_html_tags
|
|
29
|
+
|
|
30
|
+
# =============================================================================
|
|
31
|
+
# Provider utilities (for advanced users)
|
|
32
|
+
# =============================================================================
|
|
33
|
+
from ._batch import BatchJobExpiredError, BatchJobFailedError
|
|
34
|
+
|
|
35
|
+
from ._providers import (
|
|
36
|
+
UnifiedLLMClient,
|
|
37
|
+
detect_provider,
|
|
38
|
+
set_ollama_endpoint,
|
|
39
|
+
check_ollama_running,
|
|
40
|
+
list_ollama_models,
|
|
41
|
+
check_ollama_model,
|
|
42
|
+
pull_ollama_model,
|
|
43
|
+
PROVIDER_CONFIG,
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
# =============================================================================
|
|
47
|
+
# Backward compatibility - Deprecated functions
|
|
48
|
+
# These are kept for backward compatibility but users should migrate to the
|
|
49
|
+
# new unified API (extract, classify, summarize)
|
|
50
|
+
# =============================================================================
|
|
51
|
+
|
|
52
|
+
# Extraction functions (use extract() instead)
|
|
53
|
+
from .extract import (
|
|
54
|
+
explore_common_categories,
|
|
55
|
+
explore_corpus,
|
|
56
|
+
explore_image_categories,
|
|
57
|
+
explore_pdf_categories,
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
# Classification functions (use classify() instead)
|
|
61
|
+
from .classify import (
|
|
62
|
+
classify_ensemble,
|
|
63
|
+
multi_class,
|
|
64
|
+
image_multi_class,
|
|
65
|
+
pdf_multi_class,
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
# Summarization functions (use summarize() instead)
|
|
69
|
+
from .summarize import summarize_ensemble
|
|
70
|
+
|
|
71
|
+
# =============================================================================
|
|
72
|
+
# Additional utilities from existing modules (backward compatibility)
|
|
73
|
+
# =============================================================================
|
|
74
|
+
from .text_functions import (
|
|
75
|
+
build_json_schema,
|
|
76
|
+
extract_json,
|
|
77
|
+
validate_classification_json,
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
from .image_functions import (
|
|
81
|
+
image_score_drawing,
|
|
82
|
+
image_features,
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
# Define public API
|
|
86
|
+
__all__ = [
|
|
87
|
+
# Batch mode exceptions
|
|
88
|
+
"BatchJobExpiredError",
|
|
89
|
+
"BatchJobFailedError",
|
|
90
|
+
# Main entry points
|
|
91
|
+
"extract",
|
|
92
|
+
"explore",
|
|
93
|
+
"classify",
|
|
94
|
+
"summarize",
|
|
95
|
+
# Category analysis
|
|
96
|
+
"has_other_category",
|
|
97
|
+
"check_category_verbosity",
|
|
98
|
+
# Provider utilities
|
|
99
|
+
"UnifiedLLMClient",
|
|
100
|
+
"detect_provider",
|
|
101
|
+
"set_ollama_endpoint",
|
|
102
|
+
"check_ollama_running",
|
|
103
|
+
"list_ollama_models",
|
|
104
|
+
"check_ollama_model",
|
|
105
|
+
"pull_ollama_model",
|
|
106
|
+
"PROVIDER_CONFIG",
|
|
107
|
+
# Web fetching utilities
|
|
108
|
+
"is_url",
|
|
109
|
+
"fetch_url_text",
|
|
110
|
+
"fetch_urls",
|
|
111
|
+
"detect_url_input",
|
|
112
|
+
"strip_html_tags",
|
|
113
|
+
# Deprecated (backward compatibility)
|
|
114
|
+
"explore_common_categories",
|
|
115
|
+
"explore_corpus",
|
|
116
|
+
"explore_image_categories",
|
|
117
|
+
"explore_pdf_categories",
|
|
118
|
+
"classify_ensemble",
|
|
119
|
+
"summarize_ensemble",
|
|
120
|
+
"multi_class",
|
|
121
|
+
"image_multi_class",
|
|
122
|
+
"pdf_multi_class",
|
|
123
|
+
"image_score_drawing",
|
|
124
|
+
"image_features",
|
|
125
|
+
"build_json_schema",
|
|
126
|
+
"extract_json",
|
|
127
|
+
"validate_classification_json",
|
|
128
|
+
]
|