inconnu 0.1.0__py3-none-any.whl → 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
inconnu/__init__.py CHANGED
@@ -15,7 +15,7 @@ from .nlp.entity_redactor import EntityRedactor
15
15
  from .nlp.interfaces import NERComponent, ProcessedData
16
16
 
17
17
  # Package version
18
- __version__ = "0.1.0"
18
+ __version__ = "0.1.1"
19
19
 
20
20
  # Export key classes and exceptions for easy importing
21
21
  __all__ = [
@@ -4,8 +4,10 @@ Model installer for Inconnu - downloads spaCy language models.
4
4
  """
5
5
 
6
6
  import argparse
7
+ import os
7
8
  import sys
8
- from subprocess import run
9
+ from pathlib import Path
10
+ from subprocess import CalledProcessError, run
9
11
  from typing import Optional
10
12
 
11
13
  # Mapping of language codes to spaCy model names
@@ -27,8 +29,60 @@ DEFAULT_MODELS = {
27
29
  }
28
30
 
29
31
 
32
+ def is_uv_environment() -> bool:
33
+ """Check if running in a UV environment."""
34
+ # Check for UV environment markers
35
+ return (
36
+ os.environ.get("UV_PROJECT_ROOT") is not None
37
+ or Path(sys.prefix).name == ".venv"
38
+ and Path(sys.prefix).parent.joinpath("uv.lock").exists()
39
+ )
40
+
41
+
42
+ def ensure_pip_available() -> bool:
43
+ """Ensure pip is available, install it if running in UV environment."""
44
+ try:
45
+ # Try importing pip to check if it's available
46
+ import pip # type: ignore # noqa: F401
47
+
48
+ return True
49
+ except ImportError:
50
+ if is_uv_environment():
51
+ print("📦 UV environment detected. Installing pip...")
52
+ try:
53
+ result = run( # noqa: S603
54
+ ["uv", "pip", "install", "pip", "--upgrade"], # noqa: S607
55
+ capture_output=True,
56
+ text=True,
57
+ )
58
+ if result.returncode == 0:
59
+ print("✓ pip installed successfully")
60
+ return True
61
+ else:
62
+ print(f"✗ Failed to install pip: {result.stderr}")
63
+ return False
64
+ except (CalledProcessError, FileNotFoundError) as e:
65
+ print(f"✗ Error installing pip: {e}")
66
+ return False
67
+ return False
68
+
69
+
30
70
  def download_model(model_name: str, upgrade: bool = False) -> bool:
31
71
  """Download a spaCy model using subprocess."""
72
+ # First, ensure pip is available
73
+ if not ensure_pip_available():
74
+ print("\n⚠️ pip is not available and could not be installed.")
75
+ if is_uv_environment():
76
+ print("\n💡 For UV environments, you can install models directly:")
77
+ print(
78
+ f" uv add 'inconnu[{model_name.split('_')[0]}]' # for default model"
79
+ )
80
+ print(
81
+ f" uv add 'inconnu[{model_name.split('_')[0]}-lg]' # for large model"
82
+ )
83
+ print("\n Or install all languages: uv add 'inconnu[all]'")
84
+ return False
85
+
32
86
  try:
33
87
  cmd = [sys.executable, "-m", "spacy", "download", model_name]
34
88
  if upgrade:
@@ -126,12 +180,26 @@ def download_language_models(
126
180
  def download_all_default_models(upgrade: bool = False) -> bool:
127
181
  """Download all default models."""
128
182
  success = True
129
- for lang, model in DEFAULT_MODELS.items():
183
+ for model in DEFAULT_MODELS.values():
130
184
  if not download_model(model, upgrade):
131
185
  success = False
132
186
  return success
133
187
 
134
188
 
189
+ def print_uv_instructions():
190
+ """Print instructions for UV users."""
191
+ print("\n📘 UV Installation Instructions:")
192
+ print("\nFor UV environments, models can be installed as dependencies:")
193
+ print(" uv add 'inconnu[en]' # English")
194
+ print(" uv add 'inconnu[de]' # German")
195
+ print(" uv add 'inconnu[en,de,fr]' # Multiple languages")
196
+ print(" uv add 'inconnu[all]' # All languages")
197
+ print("\n Larger models:")
198
+ print(" uv add 'inconnu[en-lg]' # English large")
199
+ print(" uv add 'inconnu[en-trf]' # English transformer")
200
+ print("\n Available sizes: sm (default), md, lg, trf (English only)")
201
+
202
+
135
203
  def main():
136
204
  """Main CLI entry point."""
137
205
  parser = argparse.ArgumentParser(
@@ -162,9 +230,17 @@ Examples:
162
230
  "--upgrade", action="store_true", help="Upgrade existing models"
163
231
  )
164
232
  parser.add_argument("--list", action="store_true", help="List all available models")
233
+ parser.add_argument(
234
+ "--uv-help", action="store_true", help="Show UV installation instructions"
235
+ )
165
236
 
166
237
  args = parser.parse_args()
167
238
 
239
+ # Handle UV help
240
+ if args.uv_help:
241
+ print_uv_instructions()
242
+ return
243
+
168
244
  # Handle list command
169
245
  if args.list:
170
246
  list_available_models()
@@ -172,6 +248,14 @@ Examples:
172
248
 
173
249
  # Require at least one language if not listing
174
250
  if not args.languages:
251
+ if is_uv_environment():
252
+ print("⚠️ UV environment detected!")
253
+ print_uv_instructions()
254
+ print("\nOr use 'inconnu-download --list' to see available models")
255
+ print(
256
+ "Or use 'inconnu-download LANG' to download via this tool (requires pip)"
257
+ )
258
+ return
175
259
  parser.error("Please specify language(s) to download or use --list")
176
260
 
177
261
  # Handle 'all' keyword
@@ -19,6 +19,8 @@ class SpacyModels(StrEnum):
19
19
  EN_CORE_WEB_TRF = "en_core_web_trf"
20
20
  DE_CORE_NEWS_SM = "de_core_news_sm"
21
21
  IT_CORE_NEWS_SM = "it_core_news_sm"
22
+ ES_CORE_NEWS_SM = "es_core_news_sm"
23
+ FR_CORE_NEWS_SM = "fr_core_news_sm"
22
24
  EN_CORE_WEB_SM = "en_core_web_sm"
23
25
 
24
26
 
@@ -155,6 +157,10 @@ class EntityRedactor:
155
157
  model_name = SpacyModels.EN_CORE_WEB_SM
156
158
  case "it":
157
159
  model_name = SpacyModels.IT_CORE_NEWS_SM
160
+ case "es":
161
+ model_name = SpacyModels.ES_CORE_NEWS_SM
162
+ case "fr":
163
+ model_name = SpacyModels.FR_CORE_NEWS_SM
158
164
  case _:
159
165
  # Default to English small model for unsupported languages
160
166
  model_name = SpacyModels.EN_CORE_WEB_SM
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: inconnu
3
- Version: 0.1.0
3
+ Version: 0.1.1
4
4
  Summary: GDPR-compliant data privacy tool for entity redaction and de-anonymization
5
5
  Project-URL: Homepage, https://github.com/0xjgv/inconnu
6
6
  Project-URL: Documentation, https://github.com/0xjgv/inconnu#readme
@@ -26,16 +26,14 @@ Classifier: Topic :: Text Processing :: Linguistic
26
26
  Requires-Python: >=3.10
27
27
  Requires-Dist: phonenumbers>=9.0.8
28
28
  Requires-Dist: spacy>=3.8.7
29
- Provides-Extra: all
30
- Provides-Extra: de
31
- Provides-Extra: en
32
- Provides-Extra: es
33
- Provides-Extra: fr
34
- Provides-Extra: it
35
29
  Description-Content-Type: text/markdown
36
30
 
37
31
  # Inconnu
38
32
 
33
+ [![GitHub](https://img.shields.io/github/stars/0xjgv/inconnu)](https://github.com/0xjgv/inconnu)
34
+ [![inconnu.ai](https://img.shields.io/badge/website-inconnu.ai-blue)](https://inconnu.ai)
35
+ [![PyPI](https://img.shields.io/pypi/v/inconnu)](https://pypi.org/project/inconnu/)
36
+
39
37
  ## What is Inconnu?
40
38
 
41
39
  Inconnu is a GDPR-compliant data privacy tool designed for entity redaction and de-anonymization. It provides cutting-edge NLP-based tools for anonymizing and pseudonymizing text data while maintaining data utility, ensuring your business meets stringent privacy regulations.
@@ -62,40 +60,60 @@ Inconnu is a GDPR-compliant data privacy tool designed for entity redaction and
62
60
  ### Install from PyPI
63
61
 
64
62
  ```bash
65
- # Basic installation (without language models)
63
+ # Using pip
66
64
  pip install inconnu
67
65
 
68
- # Install with English language support
69
- pip install inconnu[en]
66
+ # Using UV (Recommended)
67
+ uv add inconnu
68
+ ```
69
+
70
+ **Note**: Language models are NOT included as optional dependencies. You'll need to download them separately using the `inconnu-download` command after installation (see below).
70
71
 
71
- # Install with specific language support
72
- pip install inconnu[de] # German
73
- pip install inconnu[fr] # French
74
- pip install inconnu[es] # Spanish
75
- pip install inconnu[it] # Italian
72
+ ### Download Language Models
73
+
74
+ After installing Inconnu, use the `inconnu-download` command to download spaCy language models:
75
+
76
+ ```bash
77
+ # Download default (small) models
78
+ inconnu-download en # English
79
+ inconnu-download de # German
80
+ inconnu-download en de fr # Multiple languages
81
+ inconnu-download all # All default models
76
82
 
77
- # Install with multiple languages
78
- pip install inconnu[en,de,fr]
83
+ # Download specific model sizes
84
+ inconnu-download en --size large # Large English model
85
+ inconnu-download en --size transformer # Transformer model (English only)
79
86
 
80
- # Install with all language support
81
- pip install inconnu[all]
87
+ # List available models and check what's installed
88
+ inconnu-download --list
89
+
90
+ # Upgrade existing models
91
+ inconnu-download en --upgrade
92
+
93
+ # Get help for UV environments
94
+ inconnu-download --uv-help
82
95
  ```
83
96
 
84
- ### Download Language Models
97
+ #### How Model Installation Works
98
+
99
+ 1. **No Optional Dependencies**: spaCy models are NOT included as pip/uv optional dependencies to avoid unnecessary downloads during dependency resolution
100
+ 2. **On-Demand Downloads**: The `inconnu-download` command downloads only the models you need
101
+ 3. **Smart Environment Detection**: Automatically detects UV environments and provides appropriate guidance
102
+ 4. **Verification**: Checks if models are already installed before downloading
103
+
104
+ #### Available Model Sizes
105
+
106
+ - **Small (sm)**: Default, fast processing, ~15-50MB, good for high-volume
107
+ - **Medium (md)**: Better accuracy, ~50-200MB, moderate speed
108
+ - **Large (lg)**: High accuracy, ~200-600MB, slower processing
109
+ - **Transformer (trf)**: Highest accuracy, ~400MB+, GPU-optimized (English only)
85
110
 
86
- After installation, download the required spaCy models:
111
+ #### Alternative: Direct spaCy Download
87
112
 
113
+ You can also use spaCy directly if preferred:
88
114
  ```bash
89
- # Using the built-in CLI tool
90
- inconnu-download en # Download default English model
91
- inconnu-download de fr # Download German and French models
92
- inconnu-download en --size large # Download large English model
93
- inconnu-download all # Download all default models
94
- inconnu-download --list # List all available models
95
-
96
- # Or using spaCy directly
97
- python -m spacy download en_core_web_sm
98
- python -m spacy download de_core_news_sm
115
+ python -m spacy download en_core_web_sm # English small
116
+ python -m spacy download de_core_news_lg # German large
99
117
  ```
100
118
 
101
119
  ### Install from Source
@@ -108,9 +126,9 @@ python -m spacy download de_core_news_sm
108
126
 
109
127
  2. **Install with UV (recommended for development)**:
110
128
  ```bash
111
- make install # Install dependencies
112
- make model-de # Download German model
113
- make test # Run tests
129
+ uv sync # Install dependencies
130
+ inconnu-download en de # Download language models
131
+ make test # Run tests
114
132
  ```
115
133
 
116
134
  3. **Or install with pip**:
@@ -119,69 +137,55 @@ python -m spacy download de_core_news_sm
119
137
  python -m spacy download en_core_web_sm
120
138
  ```
121
139
 
122
- ### Installing Additional Models
140
+ ### Development Commands
123
141
 
124
- Inconnu supports multiple spaCy models for enhanced accuracy. The default `en_core_web_sm` model is lightweight and fast, but you can install more accurate models:
142
+ For development, the Makefile provides convenience targets:
125
143
 
126
- #### English Models
127
144
  ```bash
128
- # Small model (default) - 15MB, fast processing
129
- uv run python -m spacy download en_core_web_sm
130
-
131
- # Large model - 560MB, higher accuracy
132
- uv run python -m spacy download en_core_web_lg
133
-
134
- # Transformer model - 438MB, highest accuracy
135
- uv run python -m spacy download en_core_web_trf
145
+ # Download models using make commands
146
+ make model-en # English small
147
+ make model-de # German small
148
+ make model-it # Italian small
149
+ make model-es # Spanish small
150
+ make model-fr # French small
151
+
152
+ # Other development commands
153
+ make test # Run tests
154
+ make lint # Check code with ruff
155
+ make format # Format code
156
+ make clean # Clean cache and format code
136
157
  ```
137
158
 
138
- #### Additional Language Models
139
- ```bash
140
- # German model
141
- make model-de
142
- uv run python -m spacy download de_core_news_sm
143
-
144
- # Italian model
145
- make model-it
146
- uv run python -m spacy download it_core_news_sm
159
+ ### Using Different Models in Code
147
160
 
148
- # Spanish model
149
- make model-es
150
- uv run python -m spacy download es_core_news_sm
161
+ To use a different model size, first download it, then specify it when initializing:
151
162
 
152
- # French model
153
- make model-fr
154
- uv run python -m spacy download fr_core_news_sm
155
-
156
- # For enhanced accuracy (manual installation)
157
- # Medium German model - better accuracy
158
- uv run python -m spacy download de_core_news_md
159
-
160
- # Large German model - highest accuracy
161
- uv run python -m spacy download de_core_news_lg
162
- ```
163
-
164
- #### Using Different Models
163
+ ```python
164
+ from inconnu import Inconnu
165
+ from inconnu.nlp.entity_redactor import SpacyModels
165
166
 
166
- To use a different model, specify it when initializing the EntityRedactor:
167
+ # First, download the model you want
168
+ # $ inconnu-download en --size large
167
169
 
168
- ```python
169
- from inconnu.nlp.entity_redactor import EntityRedactor, SpacyModels
170
+ # Then use it in your code
171
+ inconnu = Inconnu(
172
+ language="en",
173
+ model_name=SpacyModels.EN_CORE_WEB_LG # Use large model
174
+ )
170
175
 
171
- # Use transformer model for highest accuracy
172
- entity_redactor = EntityRedactor(
173
- custom_components=None,
176
+ # For highest accuracy (transformer model)
177
+ inconnu_trf = Inconnu(
174
178
  language="en",
175
- model_name=SpacyModels.EN_CORE_WEB_TRF # High accuracy transformer model
179
+ model_name=SpacyModels.EN_CORE_WEB_TRF
176
180
  )
177
181
  ```
178
182
 
179
183
  **Model Selection Guide:**
180
- - `en_core_web_sm`: Fast processing, good for high-volume processing
181
- - `en_core_web_lg`: Better accuracy, moderate processing time
182
- - `en_core_web_trf`: Highest accuracy, slower processing (recommended for sensitive data)
184
+ - `en_core_web_sm`: Fast processing, good for high-volume
185
+ - `en_core_web_lg`: Better accuracy, moderate speed
186
+ - `en_core_web_trf`: Highest accuracy, GPU-optimized (recommended for sensitive data)
183
187
 
184
- For more models, visit the [spaCy Models Directory](https://spacy.io/models).
188
+ For a complete list of supported models, run `inconnu-download --list`
185
189
 
186
190
  ## Development Setup
187
191
 
@@ -0,0 +1,13 @@
1
+ inconnu/__init__.py,sha256=FZgRj2vfQsd8H57_h5qoRIkOcEJbb8lgoqQrDW38LXc,7567
2
+ inconnu/config.py,sha256=SFZjg0IpzOfac8RNmCnq9sjxqHmbhAkA1LfGHqfYiP8,129
3
+ inconnu/exceptions.py,sha256=9qEqqwiRLvy5gDEPTiiTyyr_U5SQdzivBFPFx7HErG4,1547
4
+ inconnu/model_installer.py,sha256=U8J9Qay6ioFCkcuC98XbcH8pS2w6i449MT4YfdsufHI,9651
5
+ inconnu/nlp/entity_redactor.py,sha256=S6f1VCc7LcGlHEZ8bb24P8V3eWGJIz5CtYRUteLl9cw,8243
6
+ inconnu/nlp/interfaces.py,sha256=B9FhChpPBg7nmFOJltWga5nWzMsnP9yj7SxfnBjJydg,495
7
+ inconnu/nlp/patterns.py,sha256=VxwgetKRd22esnjeya86j4oNKGzcHXIiQ6VE1LAVNzE,5662
8
+ inconnu/nlp/utils.py,sha256=700Tz-wR4JFYvnvuAvyu2x2YNwkOPtvQx007H-wS-7Y,2775
9
+ inconnu-0.1.1.dist-info/METADATA,sha256=x7p2rhMnLWJhfzyh575m77fFWv4Hj3eRRaLxKPcXRws,17231
10
+ inconnu-0.1.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
11
+ inconnu-0.1.1.dist-info/entry_points.txt,sha256=jBJr5LeX-XGEBh5iMQIJr5zdzqbyOUyw3rSgWZfQcDk,66
12
+ inconnu-0.1.1.dist-info/licenses/LICENSE,sha256=LMGDpdSqFgydJ63Q0EjrcYxFvATmqE_bdNHrdsAEqNE,1089
13
+ inconnu-0.1.1.dist-info/RECORD,,
@@ -1,13 +0,0 @@
1
- inconnu/__init__.py,sha256=FHDRvMfesj7UYM1JSLwzWcDQs7eqp-zFoljNCU--YZk,7567
2
- inconnu/config.py,sha256=SFZjg0IpzOfac8RNmCnq9sjxqHmbhAkA1LfGHqfYiP8,129
3
- inconnu/exceptions.py,sha256=9qEqqwiRLvy5gDEPTiiTyyr_U5SQdzivBFPFx7HErG4,1547
4
- inconnu/model_installer.py,sha256=_PphTFdkJXsz0vwqrY0W9RTbxPaYYJylgBT1H9w7AHk,6433
5
- inconnu/nlp/entity_redactor.py,sha256=TD1G8qDX4bI9bAi5zR5oR1IbJJSst80dF2wXBCloj1Y,8003
6
- inconnu/nlp/interfaces.py,sha256=B9FhChpPBg7nmFOJltWga5nWzMsnP9yj7SxfnBjJydg,495
7
- inconnu/nlp/patterns.py,sha256=VxwgetKRd22esnjeya86j4oNKGzcHXIiQ6VE1LAVNzE,5662
8
- inconnu/nlp/utils.py,sha256=700Tz-wR4JFYvnvuAvyu2x2YNwkOPtvQx007H-wS-7Y,2775
9
- inconnu-0.1.0.dist-info/METADATA,sha256=CHGP-uLQ2xf5HOOT_aGO1ePE_qXkEG3lV8LrQZ-ctWM,16533
10
- inconnu-0.1.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
11
- inconnu-0.1.0.dist-info/entry_points.txt,sha256=jBJr5LeX-XGEBh5iMQIJr5zdzqbyOUyw3rSgWZfQcDk,66
12
- inconnu-0.1.0.dist-info/licenses/LICENSE,sha256=LMGDpdSqFgydJ63Q0EjrcYxFvATmqE_bdNHrdsAEqNE,1089
13
- inconnu-0.1.0.dist-info/RECORD,,