contentintelpy 0.1.0__tar.gz → 0.1.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {contentintelpy-0.1.0 → contentintelpy-0.1.1}/PKG-INFO +2 -1
- {contentintelpy-0.1.0 → contentintelpy-0.1.1}/contentintelpy/nodes/keyword_extract_node.py +5 -2
- {contentintelpy-0.1.0 → contentintelpy-0.1.1}/contentintelpy/nodes/summarization_node.py +8 -6
- {contentintelpy-0.1.0 → contentintelpy-0.1.1}/contentintelpy/nodes/translation_node.py +5 -2
- contentintelpy-0.1.1/contentintelpy/utils/lazy_import.py +16 -0
- {contentintelpy-0.1.0 → contentintelpy-0.1.1}/contentintelpy/utils/model_registry.py +9 -0
- {contentintelpy-0.1.0 → contentintelpy-0.1.1}/contentintelpy.egg-info/PKG-INFO +2 -1
- {contentintelpy-0.1.0 → contentintelpy-0.1.1}/contentintelpy.egg-info/SOURCES.txt +1 -0
- {contentintelpy-0.1.0 → contentintelpy-0.1.1}/contentintelpy.egg-info/requires.txt +1 -0
- {contentintelpy-0.1.0 → contentintelpy-0.1.1}/pyproject.toml +4 -3
- {contentintelpy-0.1.0 → contentintelpy-0.1.1}/LICENSE +0 -0
- {contentintelpy-0.1.0 → contentintelpy-0.1.1}/README.md +0 -0
- {contentintelpy-0.1.0 → contentintelpy-0.1.1}/contentintelpy/__init__.py +0 -0
- {contentintelpy-0.1.0 → contentintelpy-0.1.1}/contentintelpy/nodes/classification_node.py +0 -0
- {contentintelpy-0.1.0 → contentintelpy-0.1.1}/contentintelpy/nodes/language_node.py +0 -0
- {contentintelpy-0.1.0 → contentintelpy-0.1.1}/contentintelpy/nodes/location_node.py +0 -0
- {contentintelpy-0.1.0 → contentintelpy-0.1.1}/contentintelpy/nodes/ner_node.py +0 -0
- {contentintelpy-0.1.0 → contentintelpy-0.1.1}/contentintelpy/nodes/sentiment_node.py +0 -0
- {contentintelpy-0.1.0 → contentintelpy-0.1.1}/contentintelpy/pipeline/base_node.py +0 -0
- {contentintelpy-0.1.0 → contentintelpy-0.1.1}/contentintelpy/pipeline/context.py +0 -0
- {contentintelpy-0.1.0 → contentintelpy-0.1.1}/contentintelpy/pipeline/pipeline.py +0 -0
- {contentintelpy-0.1.0 → contentintelpy-0.1.1}/contentintelpy/services/ner_service.py +0 -0
- {contentintelpy-0.1.0 → contentintelpy-0.1.1}/contentintelpy/services/sentiment_service.py +0 -0
- {contentintelpy-0.1.0 → contentintelpy-0.1.1}/contentintelpy/services/summarization_service.py +0 -0
- {contentintelpy-0.1.0 → contentintelpy-0.1.1}/contentintelpy/services/translation_service.py +0 -0
- {contentintelpy-0.1.0 → contentintelpy-0.1.1}/contentintelpy.egg-info/dependency_links.txt +0 -0
- {contentintelpy-0.1.0 → contentintelpy-0.1.1}/contentintelpy.egg-info/top_level.txt +0 -0
- {contentintelpy-0.1.0 → contentintelpy-0.1.1}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: contentintelpy
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.1
|
|
4
4
|
Summary: Production-grade NLP library for unified content intelligence.
|
|
5
5
|
Author-email: Ronit Fulari <ronitfulari31@gmail.com>
|
|
6
6
|
License: MIT
|
|
@@ -16,6 +16,7 @@ Provides-Extra: core
|
|
|
16
16
|
Requires-Dist: transformers<5.0.0,>=4.30.0; extra == "core"
|
|
17
17
|
Requires-Dist: torch<3.0.0,>=2.0.0; extra == "core"
|
|
18
18
|
Requires-Dist: sentence-transformers>=2.2.0; extra == "core"
|
|
19
|
+
Requires-Dist: scikit-learn>=1.0.0; extra == "core"
|
|
19
20
|
Provides-Extra: ner
|
|
20
21
|
Requires-Dist: spacy>=3.7.0; extra == "ner"
|
|
21
22
|
Requires-Dist: gliner>=0.1.0; extra == "ner"
|
|
@@ -1,8 +1,7 @@
|
|
|
1
1
|
from ..pipeline.base_node import Node
|
|
2
2
|
from ..pipeline.context import PipelineContext
|
|
3
3
|
from ..utils.model_registry import registry
|
|
4
|
-
from
|
|
5
|
-
from sklearn.feature_extraction.text import CountVectorizer
|
|
4
|
+
from ..utils.lazy_import import ensure_dependency
|
|
6
5
|
import numpy as np
|
|
7
6
|
import logging
|
|
8
7
|
import itertools
|
|
@@ -30,6 +29,10 @@ class KeywordExtractionNode(Node):
|
|
|
30
29
|
return context
|
|
31
30
|
|
|
32
31
|
try:
|
|
32
|
+
ensure_dependency("sklearn", "core")
|
|
33
|
+
from sklearn.metrics.pairwise import cosine_similarity
|
|
34
|
+
from sklearn.feature_extraction.text import CountVectorizer
|
|
35
|
+
|
|
33
36
|
model = registry.get_embedding_model()
|
|
34
37
|
|
|
35
38
|
# 1. Candidate Generation (using simple CountVectorizer)
|
|
@@ -3,12 +3,7 @@ from ..pipeline.context import PipelineContext
|
|
|
3
3
|
from ..utils.model_registry import registry
|
|
4
4
|
import logging
|
|
5
5
|
|
|
6
|
-
|
|
7
|
-
from sumy.parsers.plaintext import PlaintextParser
|
|
8
|
-
from sumy.nlp.tokenizers import Tokenizer
|
|
9
|
-
from sumy.summarizers.lsa import LsaSummarizer as Summarizer
|
|
10
|
-
from sumy.nlp.stemmers import Stemmer
|
|
11
|
-
from sumy.utils import get_stop_words
|
|
6
|
+
from ..utils.lazy_import import ensure_dependency
|
|
12
7
|
|
|
13
8
|
logger = logging.getLogger("contentintelpy.nodes.summarization")
|
|
14
9
|
|
|
@@ -48,6 +43,13 @@ class SummarizationNode(Node):
|
|
|
48
43
|
# 2. Fallback: Sumy (LSA)
|
|
49
44
|
if not summary_text:
|
|
50
45
|
try:
|
|
46
|
+
ensure_dependency("sumy", "summarization")
|
|
47
|
+
from sumy.parsers.plaintext import PlaintextParser
|
|
48
|
+
from sumy.nlp.tokenizers import Tokenizer
|
|
49
|
+
from sumy.summarizers.lsa import LsaSummarizer as Summarizer
|
|
50
|
+
from sumy.nlp.stemmers import Stemmer
|
|
51
|
+
from sumy.utils import get_stop_words
|
|
52
|
+
|
|
51
53
|
parser = PlaintextParser.from_string(text, Tokenizer("english"))
|
|
52
54
|
stemmer = Stemmer("english")
|
|
53
55
|
summarizer = Summarizer(stemmer)
|
|
@@ -1,9 +1,8 @@
|
|
|
1
1
|
from ..pipeline.base_node import Node
|
|
2
2
|
from ..pipeline.context import PipelineContext
|
|
3
3
|
from ..utils.model_registry import registry
|
|
4
|
+
from ..utils.lazy_import import ensure_dependency
|
|
4
5
|
import logging
|
|
5
|
-
import argostranslate.package
|
|
6
|
-
import argostranslate.translate
|
|
7
6
|
|
|
8
7
|
logger = logging.getLogger("contentintelpy.nodes.translation")
|
|
9
8
|
|
|
@@ -59,6 +58,10 @@ class TranslationNode(Node):
|
|
|
59
58
|
# 3. Fallback: ArgosTranslate (Offline)
|
|
60
59
|
if not translated_text:
|
|
61
60
|
try:
|
|
61
|
+
argostranslate = ensure_dependency("argostranslate", "translation")
|
|
62
|
+
import argostranslate.package
|
|
63
|
+
import argostranslate.translate
|
|
64
|
+
|
|
62
65
|
logger.info("Attempting translation with ArgosTranslate...")
|
|
63
66
|
# Argos requires ensuring packages are installed
|
|
64
67
|
# This is a blocking network call on first run if not present
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
import importlib
|
|
2
|
+
import logging
|
|
3
|
+
|
|
4
|
+
logger = logging.getLogger("contentintelpy.utils")
|
|
5
|
+
|
|
6
|
+
def ensure_dependency(module_name: str, extra_name: str):
|
|
7
|
+
"""
|
|
8
|
+
Attempts to import a module. If it fails, raises an ImportError with a
|
|
9
|
+
helpful message directing the user to install the appropriate optional extra.
|
|
10
|
+
"""
|
|
11
|
+
try:
|
|
12
|
+
return importlib.import_module(module_name)
|
|
13
|
+
except (ImportError, ModuleNotFoundError):
|
|
14
|
+
msg = f"Missing optional dependency '{module_name}'. To use this feature, please install it using: pip install \"contentintelpy[{extra_name}]\""
|
|
15
|
+
logger.error(msg)
|
|
16
|
+
raise ImportError(msg) from None
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
from .lazy_import import ensure_dependency
|
|
1
2
|
import logging
|
|
2
3
|
import threading
|
|
3
4
|
from typing import Any, Optional
|
|
@@ -50,6 +51,8 @@ class ModelRegistry:
|
|
|
50
51
|
# --------------------------------------------------------------------------
|
|
51
52
|
def get_sentiment_pipeline(self):
|
|
52
53
|
def _loader():
|
|
54
|
+
ensure_dependency("transformers", "core")
|
|
55
|
+
ensure_dependency("torch", "core")
|
|
53
56
|
from transformers import pipeline
|
|
54
57
|
# Use a high-quality multilingual or English sentiment model
|
|
55
58
|
# CardiffNLP is standard for Twitter-like text, widely used
|
|
@@ -63,6 +66,7 @@ class ModelRegistry:
|
|
|
63
66
|
# --------------------------------------------------------------------------
|
|
64
67
|
def get_translation_pipeline(self):
|
|
65
68
|
def _loader():
|
|
69
|
+
ensure_dependency("transformers", "core")
|
|
66
70
|
from transformers import pipeline
|
|
67
71
|
# NLLB-200 Distilled (600M) is a good balance of size/quality
|
|
68
72
|
model_name = "facebook/nllb-200-distilled-600M"
|
|
@@ -75,6 +79,7 @@ class ModelRegistry:
|
|
|
75
79
|
# --------------------------------------------------------------------------
|
|
76
80
|
def get_gliner_model(self):
|
|
77
81
|
def _loader():
|
|
82
|
+
ensure_dependency("gliner", "ner")
|
|
78
83
|
from gliner import GLiNER
|
|
79
84
|
# Standard GLiNER model
|
|
80
85
|
return GLiNER.from_pretrained("urchade/gliner_large-v2.1")
|
|
@@ -86,6 +91,7 @@ class ModelRegistry:
|
|
|
86
91
|
# --------------------------------------------------------------------------
|
|
87
92
|
def get_classifier_pipeline(self):
|
|
88
93
|
def _loader():
|
|
94
|
+
ensure_dependency("transformers", "core")
|
|
89
95
|
from transformers import pipeline
|
|
90
96
|
return pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
|
|
91
97
|
|
|
@@ -96,6 +102,7 @@ class ModelRegistry:
|
|
|
96
102
|
# --------------------------------------------------------------------------
|
|
97
103
|
def get_summarization_pipeline(self):
|
|
98
104
|
def _loader():
|
|
105
|
+
ensure_dependency("transformers", "core")
|
|
99
106
|
from transformers import pipeline
|
|
100
107
|
return pipeline("summarization", model="facebook/bart-large-cnn")
|
|
101
108
|
|
|
@@ -106,6 +113,7 @@ class ModelRegistry:
|
|
|
106
113
|
# --------------------------------------------------------------------------
|
|
107
114
|
def get_language_detector(self):
|
|
108
115
|
def _loader():
|
|
116
|
+
ensure_dependency("transformers", "core")
|
|
109
117
|
from transformers import pipeline
|
|
110
118
|
return pipeline("text-classification", model="qanastek/51-languages-classifier")
|
|
111
119
|
|
|
@@ -116,6 +124,7 @@ class ModelRegistry:
|
|
|
116
124
|
# --------------------------------------------------------------------------
|
|
117
125
|
def get_embedding_model(self):
|
|
118
126
|
def _loader():
|
|
127
|
+
ensure_dependency("sentence_transformers", "core")
|
|
119
128
|
from sentence_transformers import SentenceTransformer
|
|
120
129
|
# Fast, effective embedding model for semantic similarity
|
|
121
130
|
return SentenceTransformer('all-MiniLM-L6-v2')
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: contentintelpy
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.1
|
|
4
4
|
Summary: Production-grade NLP library for unified content intelligence.
|
|
5
5
|
Author-email: Ronit Fulari <ronitfulari31@gmail.com>
|
|
6
6
|
License: MIT
|
|
@@ -16,6 +16,7 @@ Provides-Extra: core
|
|
|
16
16
|
Requires-Dist: transformers<5.0.0,>=4.30.0; extra == "core"
|
|
17
17
|
Requires-Dist: torch<3.0.0,>=2.0.0; extra == "core"
|
|
18
18
|
Requires-Dist: sentence-transformers>=2.2.0; extra == "core"
|
|
19
|
+
Requires-Dist: scikit-learn>=1.0.0; extra == "core"
|
|
19
20
|
Provides-Extra: ner
|
|
20
21
|
Requires-Dist: spacy>=3.7.0; extra == "ner"
|
|
21
22
|
Requires-Dist: gliner>=0.1.0; extra == "ner"
|
|
@@ -22,4 +22,5 @@ contentintelpy/services/ner_service.py
|
|
|
22
22
|
contentintelpy/services/sentiment_service.py
|
|
23
23
|
contentintelpy/services/summarization_service.py
|
|
24
24
|
contentintelpy/services/translation_service.py
|
|
25
|
+
contentintelpy/utils/lazy_import.py
|
|
25
26
|
contentintelpy/utils/model_registry.py
|
|
@@ -4,10 +4,10 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "contentintelpy"
|
|
7
|
-
version = "0.1.
|
|
7
|
+
version = "0.1.1"
|
|
8
8
|
description = "Production-grade NLP library for unified content intelligence."
|
|
9
9
|
authors = [
|
|
10
|
-
|
|
10
|
+
{ name = "Ronit Fulari", email = "ronitfulari31@gmail.com" },
|
|
11
11
|
]
|
|
12
12
|
license = { text = "MIT" }
|
|
13
13
|
readme = "README.md"
|
|
@@ -26,7 +26,8 @@ dependencies = [
|
|
|
26
26
|
core = [
|
|
27
27
|
"transformers>=4.30.0,<5.0.0",
|
|
28
28
|
"torch>=2.0.0,<3.0.0",
|
|
29
|
-
"sentence-transformers>=2.2.0"
|
|
29
|
+
"sentence-transformers>=2.2.0",
|
|
30
|
+
"scikit-learn>=1.0.0"
|
|
30
31
|
]
|
|
31
32
|
|
|
32
33
|
ner = [
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{contentintelpy-0.1.0 → contentintelpy-0.1.1}/contentintelpy/services/summarization_service.py
RENAMED
|
File without changes
|
{contentintelpy-0.1.0 → contentintelpy-0.1.1}/contentintelpy/services/translation_service.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|