sinapsis-bertopic 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sinapsis_bertopic-0.1.0/PKG-INFO +310 -0
- sinapsis_bertopic-0.1.0/README.md +289 -0
- sinapsis_bertopic-0.1.0/pyproject.toml +67 -0
- sinapsis_bertopic-0.1.0/setup.cfg +4 -0
- sinapsis_bertopic-0.1.0/src/sinapsis_bertopic/__init__.py +0 -0
- sinapsis_bertopic-0.1.0/src/sinapsis_bertopic/helpers/__init__.py +0 -0
- sinapsis_bertopic-0.1.0/src/sinapsis_bertopic/helpers/bertopic_helpers.py +116 -0
- sinapsis_bertopic-0.1.0/src/sinapsis_bertopic/templates/__init__.py +23 -0
- sinapsis_bertopic-0.1.0/src/sinapsis_bertopic/templates/base_models/__init__.py +0 -0
- sinapsis_bertopic-0.1.0/src/sinapsis_bertopic/templates/base_models/base_attrs.py +146 -0
- sinapsis_bertopic-0.1.0/src/sinapsis_bertopic/templates/bertopic_base.py +99 -0
- sinapsis_bertopic-0.1.0/src/sinapsis_bertopic/templates/bertopic_fit_model.py +158 -0
- sinapsis_bertopic-0.1.0/src/sinapsis_bertopic/templates/bertopic_predict.py +193 -0
- sinapsis_bertopic-0.1.0/src/sinapsis_bertopic/templates/bertopic_visualize_documents.py +193 -0
- sinapsis_bertopic-0.1.0/src/sinapsis_bertopic/templates/bertopic_visualize_topics.py +154 -0
- sinapsis_bertopic-0.1.0/src/sinapsis_bertopic/third_party/_documents.py +310 -0
- sinapsis_bertopic-0.1.0/src/sinapsis_bertopic.egg-info/PKG-INFO +310 -0
- sinapsis_bertopic-0.1.0/src/sinapsis_bertopic.egg-info/SOURCES.txt +19 -0
- sinapsis_bertopic-0.1.0/src/sinapsis_bertopic.egg-info/dependency_links.txt +1 -0
- sinapsis_bertopic-0.1.0/src/sinapsis_bertopic.egg-info/requires.txt +13 -0
- sinapsis_bertopic-0.1.0/src/sinapsis_bertopic.egg-info/top_level.txt +1 -0
|
@@ -0,0 +1,310 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: sinapsis-bertopic
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Add your description here
|
|
5
|
+
Project-URL: Homepage, https://sinapsis.tech
|
|
6
|
+
Project-URL: Documentation, https://docs.sinapsis.tech/docs
|
|
7
|
+
Project-URL: Tutorials, https://docs.sinapsis.tech/tutorials
|
|
8
|
+
Project-URL: Repository, https://github.com/Sinapsis-AI/sinapsis-bertopic.git
|
|
9
|
+
Requires-Python: >=3.10
|
|
10
|
+
Description-Content-Type: text/markdown
|
|
11
|
+
Requires-Dist: bertopic>=0.17.4
|
|
12
|
+
Requires-Dist: kaleido>=1.2.0
|
|
13
|
+
Requires-Dist: pillow>=12.1.1
|
|
14
|
+
Requires-Dist: sinapsis>=0.2.25
|
|
15
|
+
Provides-Extra: wikipedia-reader
|
|
16
|
+
Requires-Dist: sinapsis-langchain-readers[langchain-wikipedia-readers]>=0.1.8; extra == "wikipedia-reader"
|
|
17
|
+
Provides-Extra: sinapsis-data-writers
|
|
18
|
+
Requires-Dist: sinapsis-data-writers[opencv]>=0.1.16; extra == "sinapsis-data-writers"
|
|
19
|
+
Provides-Extra: all
|
|
20
|
+
Requires-Dist: sinapsis-bertopic[sinapsis-data-writers,wikipedia-reader]; extra == "all"
|
|
21
|
+
|
|
22
|
+
<h1 align="center">
|
|
23
|
+
<br>
|
|
24
|
+
<a href="https://sinapsis.tech/">
|
|
25
|
+
<img
|
|
26
|
+
src="https://github.com/Sinapsis-AI/brand-resources/blob/main/sinapsis_logo/4x/logo.png?raw=true"
|
|
27
|
+
alt="" width="300">
|
|
28
|
+
</a><br>
|
|
29
|
+
Sinapsis BERTopic
|
|
30
|
+
<br>
|
|
31
|
+
</h1>
|
|
32
|
+
|
|
33
|
+
<h4 align="center">Package for BERTopic </h4>
|
|
34
|
+
|
|
35
|
+
<p align="center">
|
|
36
|
+
<a href="#installation">🐍 Installation</a> •
|
|
37
|
+
<a href="#features"> 🚀 Features</a> •
|
|
38
|
+
<a href="#documentation">📙 Documentation</a> •
|
|
39
|
+
<a href="#license"> 🔍 License </a>
|
|
40
|
+
</p>
|
|
41
|
+
|
|
42
|
+
**Sinapsis BERTopic** provides BERTopic model integration for the Sinapsis framework for topic clusterization.
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
<h2 id="installation"> 🐍 Installation </h2>
|
|
46
|
+
|
|
47
|
+
Install using your package manager of choice. We encourage the use of <code>uv</code>
|
|
48
|
+
|
|
49
|
+
This project is private. Make sure you have authorized credentials before proceeding.
|
|
50
|
+
|
|
51
|
+
**Recommended Method (using `.netrc`):**
|
|
52
|
+
|
|
53
|
+
To avoid baking credentials into URLs, configure your `~/.netrc` file with your credentials:
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
Example with <code>uv</code>:
|
|
58
|
+
|
|
59
|
+
```bash
|
|
60
|
+
uv pip install sinapsis-bertopic --extra-index-url https://pypi.sinapsis.tech
|
|
61
|
+
```
|
|
62
|
+
or with raw <code>pip</code>:
|
|
63
|
+
```bash
|
|
64
|
+
pip install sinapsis-bertopic --extra-index-url https://pypi.sinapsis.tech
|
|
65
|
+
```
|
|
66
|
+
<h2 id="features">🚀 Features</h2>
|
|
67
|
+
|
|
68
|
+
<h3>Templates Supported</h3>
|
|
69
|
+
|
|
70
|
+
This package includes a publisher Template and a Worker agent
|
|
71
|
+
- **BERTopicFitModel**: A template class for fitting BERTopic models and saving them to disk.
|
|
72
|
+
- **BERTopicPredict**: Template for topic prediction using BERTopic models.
|
|
73
|
+
- **BERTopicVisualizeDocuments**: BERTopic-based document visualization template for generating and exporting interactive topic model
|
|
74
|
+
visualizations.
|
|
75
|
+
This template extends BERTopicBase to provide functionality for encoding documents using sentence transformers,
|
|
76
|
+
fitting a BERTopic model, and producing interactive visualizations of documents in a reduced dimensional space.
|
|
77
|
+
The visualizations can be saved as HTML files and optionally exported as image arrays.
|
|
78
|
+
- **BERTopicVisualizeTopics**: Template for BERTopic topic visualization.
|
|
79
|
+
|
|
80
|
+
This template extends BERTopicPredict to generate and save interactive visualizations
|
|
81
|
+
of topics discovered by a BERTopic model. It produces plotly-based visual representations
|
|
82
|
+
of topic relationships and characteristics, and persists them as HTML files.
|
|
83
|
+
|
|
84
|
+
> [!TIP]
|
|
85
|
+
> Use CLI command ``` sinapsis info --all-template-names``` to show a list with all the available Template names installed with Sinapsis OpenAI.
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
> [!TIP]
|
|
89
|
+
> Use CLI command ```sinapsis info --example-template-config TEMPLATE_NAME``` to produce an example Agent config for the Template specified in ***TEMPLATE_NAME***.
|
|
90
|
+
|
|
91
|
+
For example, for ***BERTopicFitModel*** use ```sinapsis info --example-template-config BERTopicFitModel``` to produce an example config like:
|
|
92
|
+
|
|
93
|
+
```yaml
|
|
94
|
+
agent:
|
|
95
|
+
name: my_test_agent
|
|
96
|
+
templates:
|
|
97
|
+
- template_name: InputTemplate
|
|
98
|
+
class_name: InputTemplate
|
|
99
|
+
attributes: {}
|
|
100
|
+
- template_name: BERTopicFitModel
|
|
101
|
+
class_name: BERTopicFitModel
|
|
102
|
+
template_input: InputTemplate
|
|
103
|
+
attributes:
|
|
104
|
+
bertopic_model_params:
|
|
105
|
+
language: english
|
|
106
|
+
top_n_words: 10
|
|
107
|
+
n_gram_range: !!python/tuple
|
|
108
|
+
- 1
|
|
109
|
+
- 1
|
|
110
|
+
min_topic_size: 10
|
|
111
|
+
nr_topics: null
|
|
112
|
+
low_memory: false
|
|
113
|
+
calculate_probabilities: false
|
|
114
|
+
seed_topic_list: null
|
|
115
|
+
zeroshot_topic_list: null
|
|
116
|
+
zeroshot_min_similarity: 0.7
|
|
117
|
+
umap_model_params:
|
|
118
|
+
n_neighbors: 15
|
|
119
|
+
n_components: 2
|
|
120
|
+
metric: euclidean
|
|
121
|
+
metric_kwds: null
|
|
122
|
+
output_metric: euclidean
|
|
123
|
+
output_metric_kwds: null
|
|
124
|
+
n_epochs: null
|
|
125
|
+
learning_rate: 1.0
|
|
126
|
+
init: spectral
|
|
127
|
+
min_dist: 0.1
|
|
128
|
+
spread: 1.0
|
|
129
|
+
low_memory: true
|
|
130
|
+
n_jobs: -1
|
|
131
|
+
set_op_mix_ratio: 1.0
|
|
132
|
+
local_connectivity: 1.0
|
|
133
|
+
repulsion_strength: 1.0
|
|
134
|
+
negative_sample_rate: 5
|
|
135
|
+
transform_queue_size: 4.0
|
|
136
|
+
a: null
|
|
137
|
+
b: null
|
|
138
|
+
random_state: null
|
|
139
|
+
angular_rp_forest: false
|
|
140
|
+
target_n_neighbors: -1
|
|
141
|
+
target_metric: categorical
|
|
142
|
+
target_metric_kwds: null
|
|
143
|
+
target_weight: 0.5
|
|
144
|
+
transform_seed: 42
|
|
145
|
+
transform_mode: embedding
|
|
146
|
+
force_approximation_algorithm: false
|
|
147
|
+
verbose: false
|
|
148
|
+
tqdm_kwds: null
|
|
149
|
+
unique: false
|
|
150
|
+
densmap: false
|
|
151
|
+
dens_lambda: 2.0
|
|
152
|
+
dens_frac: 0.3
|
|
153
|
+
dens_var_shift: 0.1
|
|
154
|
+
output_dens: false
|
|
155
|
+
disconnection_distance: null
|
|
156
|
+
precomputed_knn: !!python/tuple
|
|
157
|
+
- null
|
|
158
|
+
- null
|
|
159
|
+
- null
|
|
160
|
+
hdbscan_model_params:
|
|
161
|
+
min_cluster_size: 5
|
|
162
|
+
min_samples: null
|
|
163
|
+
cluster_selection_epsilon: 0.0
|
|
164
|
+
cluster_selection_persistence: 0.0
|
|
165
|
+
max_cluster_size: 0
|
|
166
|
+
metric: euclidean
|
|
167
|
+
alpha: 1.0
|
|
168
|
+
p: null
|
|
169
|
+
algorithm: best
|
|
170
|
+
leaf_size: 40
|
|
171
|
+
approx_min_span_tree: true
|
|
172
|
+
gen_min_span_tree: false
|
|
173
|
+
core_dist_n_jobs: 4
|
|
174
|
+
cluster_selection_method: eom
|
|
175
|
+
allow_single_cluster: false
|
|
176
|
+
prediction_data: false
|
|
177
|
+
branch_detection_data: false
|
|
178
|
+
match_reference_implementation: false
|
|
179
|
+
cluster_selection_epsilon_max: '`replace_me:<class ''float''>`'
|
|
180
|
+
kwargs: '`replace_me:dict[str, typing.Any]`'
|
|
181
|
+
bertopic_save_model_params:
|
|
182
|
+
serialization: safetensors
|
|
183
|
+
save_ctfidf: true
|
|
184
|
+
save_embedding_model: sentence-transformers/all-MiniLM-L6-v2
|
|
185
|
+
root_dir: /root/.cache/sinapsis
|
|
186
|
+
save_path: '`replace_me:<class ''str''>`'
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
```
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
<h2 id='example'>📚 Usage example</h2>
|
|
194
|
+
|
|
195
|
+
Below is an example YAML configuration for an albumentations worker
|
|
196
|
+
|
|
197
|
+
<details>
|
|
198
|
+
<summary ><strong><span style="font-size: 1.4em;">Config</span></strong></summary>
|
|
199
|
+
|
|
200
|
+
```yaml
|
|
201
|
+
agent:
|
|
202
|
+
name: my_test_agent
|
|
203
|
+
templates:
|
|
204
|
+
- template_name: InputTemplate
|
|
205
|
+
class_name: InputTemplate
|
|
206
|
+
attributes: {}
|
|
207
|
+
- template_name: BERTopicFitModel
|
|
208
|
+
class_name: BERTopicFitModel
|
|
209
|
+
template_input: InputTemplate
|
|
210
|
+
attributes:
|
|
211
|
+
bertopic_model_params:
|
|
212
|
+
language: english
|
|
213
|
+
top_n_words: 10
|
|
214
|
+
n_gram_range: !!python/tuple
|
|
215
|
+
- 1
|
|
216
|
+
- 1
|
|
217
|
+
min_topic_size: 10
|
|
218
|
+
nr_topics: null
|
|
219
|
+
low_memory: false
|
|
220
|
+
calculate_probabilities: false
|
|
221
|
+
seed_topic_list: null
|
|
222
|
+
zeroshot_topic_list: null
|
|
223
|
+
zeroshot_min_similarity: 0.7
|
|
224
|
+
umap_model_params:
|
|
225
|
+
n_neighbors: 15
|
|
226
|
+
n_components: 2
|
|
227
|
+
metric: euclidean
|
|
228
|
+
metric_kwds: null
|
|
229
|
+
output_metric: euclidean
|
|
230
|
+
output_metric_kwds: null
|
|
231
|
+
n_epochs: null
|
|
232
|
+
learning_rate: 1.0
|
|
233
|
+
init: spectral
|
|
234
|
+
min_dist: 0.1
|
|
235
|
+
spread: 1.0
|
|
236
|
+
low_memory: true
|
|
237
|
+
n_jobs: -1
|
|
238
|
+
set_op_mix_ratio: 1.0
|
|
239
|
+
local_connectivity: 1.0
|
|
240
|
+
repulsion_strength: 1.0
|
|
241
|
+
negative_sample_rate: 5
|
|
242
|
+
transform_queue_size: 4.0
|
|
243
|
+
a: null
|
|
244
|
+
b: null
|
|
245
|
+
random_state: null
|
|
246
|
+
angular_rp_forest: false
|
|
247
|
+
target_n_neighbors: -1
|
|
248
|
+
target_metric: categorical
|
|
249
|
+
target_metric_kwds: null
|
|
250
|
+
target_weight: 0.5
|
|
251
|
+
transform_seed: 42
|
|
252
|
+
transform_mode: embedding
|
|
253
|
+
force_approximation_algorithm: false
|
|
254
|
+
verbose: false
|
|
255
|
+
tqdm_kwds: null
|
|
256
|
+
unique: false
|
|
257
|
+
densmap: false
|
|
258
|
+
dens_lambda: 2.0
|
|
259
|
+
dens_frac: 0.3
|
|
260
|
+
dens_var_shift: 0.1
|
|
261
|
+
output_dens: false
|
|
262
|
+
disconnection_distance: null
|
|
263
|
+
precomputed_knn: !!python/tuple
|
|
264
|
+
- null
|
|
265
|
+
- null
|
|
266
|
+
- null
|
|
267
|
+
hdbscan_model_params:
|
|
268
|
+
min_cluster_size: 5
|
|
269
|
+
min_samples: null
|
|
270
|
+
cluster_selection_epsilon: 0.0
|
|
271
|
+
cluster_selection_persistence: 0.0
|
|
272
|
+
max_cluster_size: 0
|
|
273
|
+
metric: euclidean
|
|
274
|
+
alpha: 1.0
|
|
275
|
+
p: null
|
|
276
|
+
algorithm: best
|
|
277
|
+
leaf_size: 40
|
|
278
|
+
approx_min_span_tree: true
|
|
279
|
+
gen_min_span_tree: false
|
|
280
|
+
core_dist_n_jobs: 4
|
|
281
|
+
cluster_selection_method: eom
|
|
282
|
+
allow_single_cluster: false
|
|
283
|
+
prediction_data: false
|
|
284
|
+
branch_detection_data: false
|
|
285
|
+
match_reference_implementation: false
|
|
286
|
+
cluster_selection_epsilon_max: '`replace_me:<class ''float''>`'
|
|
287
|
+
kwargs: '`replace_me:dict[str, typing.Any]`'
|
|
288
|
+
bertopic_save_model_params:
|
|
289
|
+
serialization: safetensors
|
|
290
|
+
save_ctfidf: true
|
|
291
|
+
save_embedding_model: sentence-transformers/all-MiniLM-L6-v2
|
|
292
|
+
root_dir: /root/.cache/sinapsis
|
|
293
|
+
save_path: '`replace_me:<class ''str''>`'
|
|
294
|
+
|
|
295
|
+
```
|
|
296
|
+
</details>
|
|
297
|
+
This configuration defines an **agent** and a sequence of **templates** to fit a bertopic data based on incoming data.
|
|
298
|
+
|
|
299
|
+
To run the config, use the CLI:
|
|
300
|
+
```bash
|
|
301
|
+
sinapsis run name_of_config.yml
|
|
302
|
+
```
|
|
303
|
+
|
|
304
|
+
|
|
305
|
+
<h2 id="documentation">📙 Documentation</h2>
|
|
306
|
+
|
|
307
|
+
Documentation for this and other sinapsis packages is available on the [sinapsis website](https://docs.sinapsis.tech/docs)
|
|
308
|
+
|
|
309
|
+
Tutorials for different projects within sinapsis are available at [sinapsis tutorials page](https://docs.sinapsis.tech/tutorials)
|
|
310
|
+
|
|
@@ -0,0 +1,289 @@
|
|
|
1
|
+
<h1 align="center">
|
|
2
|
+
<br>
|
|
3
|
+
<a href="https://sinapsis.tech/">
|
|
4
|
+
<img
|
|
5
|
+
src="https://github.com/Sinapsis-AI/brand-resources/blob/main/sinapsis_logo/4x/logo.png?raw=true"
|
|
6
|
+
alt="" width="300">
|
|
7
|
+
</a><br>
|
|
8
|
+
Sinapsis BERTopic
|
|
9
|
+
<br>
|
|
10
|
+
</h1>
|
|
11
|
+
|
|
12
|
+
<h4 align="center">Package for BERTopic </h4>
|
|
13
|
+
|
|
14
|
+
<p align="center">
|
|
15
|
+
<a href="#installation">🐍 Installation</a> •
|
|
16
|
+
<a href="#features"> 🚀 Features</a> •
|
|
17
|
+
<a href="#documentation">📙 Documentation</a> •
|
|
18
|
+
<a href="#license"> 🔍 License </a>
|
|
19
|
+
</p>
|
|
20
|
+
|
|
21
|
+
**Sinapsis BERTopic** provides BERTopic model integration for the Sinapsis framework for topic clusterization.
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
<h2 id="installation"> 🐍 Installation </h2>
|
|
25
|
+
|
|
26
|
+
Install using your package manager of choice. We encourage the use of <code>uv</code>
|
|
27
|
+
|
|
28
|
+
This project is private. Make sure you have authorized credentials before proceeding.
|
|
29
|
+
|
|
30
|
+
**Recommended Method (using `.netrc`):**
|
|
31
|
+
|
|
32
|
+
To avoid baking credentials into URLs, configure your `~/.netrc` file with your credentials:
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
Example with <code>uv</code>:
|
|
37
|
+
|
|
38
|
+
```bash
|
|
39
|
+
uv pip install sinapsis-bertopic --extra-index-url https://pypi.sinapsis.tech
|
|
40
|
+
```
|
|
41
|
+
or with raw <code>pip</code>:
|
|
42
|
+
```bash
|
|
43
|
+
pip install sinapsis-bertopic --extra-index-url https://pypi.sinapsis.tech
|
|
44
|
+
```
|
|
45
|
+
<h2 id="features">🚀 Features</h2>
|
|
46
|
+
|
|
47
|
+
<h3>Templates Supported</h3>
|
|
48
|
+
|
|
49
|
+
This package includes a publisher Template and a Worker agent
|
|
50
|
+
- **BERTopicFitModel**: A template class for fitting BERTopic models and saving them to disk.
|
|
51
|
+
- **BERTopicPredict**: Template for topic prediction using BERTopic models.
|
|
52
|
+
- **BERTopicVisualizeDocuments**: BERTopic-based document visualization template for generating and exporting interactive topic model
|
|
53
|
+
visualizations.
|
|
54
|
+
This template extends BERTopicBase to provide functionality for encoding documents using sentence transformers,
|
|
55
|
+
fitting a BERTopic model, and producing interactive visualizations of documents in a reduced dimensional space.
|
|
56
|
+
The visualizations can be saved as HTML files and optionally exported as image arrays.
|
|
57
|
+
- **BERTopicVisualizeTopics**: Template for BERTopic topic visualization.
|
|
58
|
+
|
|
59
|
+
This template extends BERTopicPredict to generate and save interactive visualizations
|
|
60
|
+
of topics discovered by a BERTopic model. It produces plotly-based visual representations
|
|
61
|
+
of topic relationships and characteristics, and persists them as HTML files.
|
|
62
|
+
|
|
63
|
+
> [!TIP]
|
|
64
|
+
> Use CLI command ``` sinapsis info --all-template-names``` to show a list with all the available Template names installed with Sinapsis OpenAI.
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
> [!TIP]
|
|
68
|
+
> Use CLI command ```sinapsis info --example-template-config TEMPLATE_NAME``` to produce an example Agent config for the Template specified in ***TEMPLATE_NAME***.
|
|
69
|
+
|
|
70
|
+
For example, for ***BERTopicFitModel*** use ```sinapsis info --example-template-config BERTopicFitModel``` to produce an example config like:
|
|
71
|
+
|
|
72
|
+
```yaml
|
|
73
|
+
agent:
|
|
74
|
+
name: my_test_agent
|
|
75
|
+
templates:
|
|
76
|
+
- template_name: InputTemplate
|
|
77
|
+
class_name: InputTemplate
|
|
78
|
+
attributes: {}
|
|
79
|
+
- template_name: BERTopicFitModel
|
|
80
|
+
class_name: BERTopicFitModel
|
|
81
|
+
template_input: InputTemplate
|
|
82
|
+
attributes:
|
|
83
|
+
bertopic_model_params:
|
|
84
|
+
language: english
|
|
85
|
+
top_n_words: 10
|
|
86
|
+
n_gram_range: !!python/tuple
|
|
87
|
+
- 1
|
|
88
|
+
- 1
|
|
89
|
+
min_topic_size: 10
|
|
90
|
+
nr_topics: null
|
|
91
|
+
low_memory: false
|
|
92
|
+
calculate_probabilities: false
|
|
93
|
+
seed_topic_list: null
|
|
94
|
+
zeroshot_topic_list: null
|
|
95
|
+
zeroshot_min_similarity: 0.7
|
|
96
|
+
umap_model_params:
|
|
97
|
+
n_neighbors: 15
|
|
98
|
+
n_components: 2
|
|
99
|
+
metric: euclidean
|
|
100
|
+
metric_kwds: null
|
|
101
|
+
output_metric: euclidean
|
|
102
|
+
output_metric_kwds: null
|
|
103
|
+
n_epochs: null
|
|
104
|
+
learning_rate: 1.0
|
|
105
|
+
init: spectral
|
|
106
|
+
min_dist: 0.1
|
|
107
|
+
spread: 1.0
|
|
108
|
+
low_memory: true
|
|
109
|
+
n_jobs: -1
|
|
110
|
+
set_op_mix_ratio: 1.0
|
|
111
|
+
local_connectivity: 1.0
|
|
112
|
+
repulsion_strength: 1.0
|
|
113
|
+
negative_sample_rate: 5
|
|
114
|
+
transform_queue_size: 4.0
|
|
115
|
+
a: null
|
|
116
|
+
b: null
|
|
117
|
+
random_state: null
|
|
118
|
+
angular_rp_forest: false
|
|
119
|
+
target_n_neighbors: -1
|
|
120
|
+
target_metric: categorical
|
|
121
|
+
target_metric_kwds: null
|
|
122
|
+
target_weight: 0.5
|
|
123
|
+
transform_seed: 42
|
|
124
|
+
transform_mode: embedding
|
|
125
|
+
force_approximation_algorithm: false
|
|
126
|
+
verbose: false
|
|
127
|
+
tqdm_kwds: null
|
|
128
|
+
unique: false
|
|
129
|
+
densmap: false
|
|
130
|
+
dens_lambda: 2.0
|
|
131
|
+
dens_frac: 0.3
|
|
132
|
+
dens_var_shift: 0.1
|
|
133
|
+
output_dens: false
|
|
134
|
+
disconnection_distance: null
|
|
135
|
+
precomputed_knn: !!python/tuple
|
|
136
|
+
- null
|
|
137
|
+
- null
|
|
138
|
+
- null
|
|
139
|
+
hdbscan_model_params:
|
|
140
|
+
min_cluster_size: 5
|
|
141
|
+
min_samples: null
|
|
142
|
+
cluster_selection_epsilon: 0.0
|
|
143
|
+
cluster_selection_persistence: 0.0
|
|
144
|
+
max_cluster_size: 0
|
|
145
|
+
metric: euclidean
|
|
146
|
+
alpha: 1.0
|
|
147
|
+
p: null
|
|
148
|
+
algorithm: best
|
|
149
|
+
leaf_size: 40
|
|
150
|
+
approx_min_span_tree: true
|
|
151
|
+
gen_min_span_tree: false
|
|
152
|
+
core_dist_n_jobs: 4
|
|
153
|
+
cluster_selection_method: eom
|
|
154
|
+
allow_single_cluster: false
|
|
155
|
+
prediction_data: false
|
|
156
|
+
branch_detection_data: false
|
|
157
|
+
match_reference_implementation: false
|
|
158
|
+
cluster_selection_epsilon_max: '`replace_me:<class ''float''>`'
|
|
159
|
+
kwargs: '`replace_me:dict[str, typing.Any]`'
|
|
160
|
+
bertopic_save_model_params:
|
|
161
|
+
serialization: safetensors
|
|
162
|
+
save_ctfidf: true
|
|
163
|
+
save_embedding_model: sentence-transformers/all-MiniLM-L6-v2
|
|
164
|
+
root_dir: /root/.cache/sinapsis
|
|
165
|
+
save_path: '`replace_me:<class ''str''>`'
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
```
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
<h2 id='example'>📚 Usage example</h2>
|
|
173
|
+
|
|
174
|
+
Below is an example YAML configuration for an albumentations worker
|
|
175
|
+
|
|
176
|
+
<details>
|
|
177
|
+
<summary ><strong><span style="font-size: 1.4em;">Config</span></strong></summary>
|
|
178
|
+
|
|
179
|
+
```yaml
|
|
180
|
+
agent:
|
|
181
|
+
name: my_test_agent
|
|
182
|
+
templates:
|
|
183
|
+
- template_name: InputTemplate
|
|
184
|
+
class_name: InputTemplate
|
|
185
|
+
attributes: {}
|
|
186
|
+
- template_name: BERTopicFitModel
|
|
187
|
+
class_name: BERTopicFitModel
|
|
188
|
+
template_input: InputTemplate
|
|
189
|
+
attributes:
|
|
190
|
+
bertopic_model_params:
|
|
191
|
+
language: english
|
|
192
|
+
top_n_words: 10
|
|
193
|
+
n_gram_range: !!python/tuple
|
|
194
|
+
- 1
|
|
195
|
+
- 1
|
|
196
|
+
min_topic_size: 10
|
|
197
|
+
nr_topics: null
|
|
198
|
+
low_memory: false
|
|
199
|
+
calculate_probabilities: false
|
|
200
|
+
seed_topic_list: null
|
|
201
|
+
zeroshot_topic_list: null
|
|
202
|
+
zeroshot_min_similarity: 0.7
|
|
203
|
+
umap_model_params:
|
|
204
|
+
n_neighbors: 15
|
|
205
|
+
n_components: 2
|
|
206
|
+
metric: euclidean
|
|
207
|
+
metric_kwds: null
|
|
208
|
+
output_metric: euclidean
|
|
209
|
+
output_metric_kwds: null
|
|
210
|
+
n_epochs: null
|
|
211
|
+
learning_rate: 1.0
|
|
212
|
+
init: spectral
|
|
213
|
+
min_dist: 0.1
|
|
214
|
+
spread: 1.0
|
|
215
|
+
low_memory: true
|
|
216
|
+
n_jobs: -1
|
|
217
|
+
set_op_mix_ratio: 1.0
|
|
218
|
+
local_connectivity: 1.0
|
|
219
|
+
repulsion_strength: 1.0
|
|
220
|
+
negative_sample_rate: 5
|
|
221
|
+
transform_queue_size: 4.0
|
|
222
|
+
a: null
|
|
223
|
+
b: null
|
|
224
|
+
random_state: null
|
|
225
|
+
angular_rp_forest: false
|
|
226
|
+
target_n_neighbors: -1
|
|
227
|
+
target_metric: categorical
|
|
228
|
+
target_metric_kwds: null
|
|
229
|
+
target_weight: 0.5
|
|
230
|
+
transform_seed: 42
|
|
231
|
+
transform_mode: embedding
|
|
232
|
+
force_approximation_algorithm: false
|
|
233
|
+
verbose: false
|
|
234
|
+
tqdm_kwds: null
|
|
235
|
+
unique: false
|
|
236
|
+
densmap: false
|
|
237
|
+
dens_lambda: 2.0
|
|
238
|
+
dens_frac: 0.3
|
|
239
|
+
dens_var_shift: 0.1
|
|
240
|
+
output_dens: false
|
|
241
|
+
disconnection_distance: null
|
|
242
|
+
precomputed_knn: !!python/tuple
|
|
243
|
+
- null
|
|
244
|
+
- null
|
|
245
|
+
- null
|
|
246
|
+
hdbscan_model_params:
|
|
247
|
+
min_cluster_size: 5
|
|
248
|
+
min_samples: null
|
|
249
|
+
cluster_selection_epsilon: 0.0
|
|
250
|
+
cluster_selection_persistence: 0.0
|
|
251
|
+
max_cluster_size: 0
|
|
252
|
+
metric: euclidean
|
|
253
|
+
alpha: 1.0
|
|
254
|
+
p: null
|
|
255
|
+
algorithm: best
|
|
256
|
+
leaf_size: 40
|
|
257
|
+
approx_min_span_tree: true
|
|
258
|
+
gen_min_span_tree: false
|
|
259
|
+
core_dist_n_jobs: 4
|
|
260
|
+
cluster_selection_method: eom
|
|
261
|
+
allow_single_cluster: false
|
|
262
|
+
prediction_data: false
|
|
263
|
+
branch_detection_data: false
|
|
264
|
+
match_reference_implementation: false
|
|
265
|
+
cluster_selection_epsilon_max: '`replace_me:<class ''float''>`'
|
|
266
|
+
kwargs: '`replace_me:dict[str, typing.Any]`'
|
|
267
|
+
bertopic_save_model_params:
|
|
268
|
+
serialization: safetensors
|
|
269
|
+
save_ctfidf: true
|
|
270
|
+
save_embedding_model: sentence-transformers/all-MiniLM-L6-v2
|
|
271
|
+
root_dir: /root/.cache/sinapsis
|
|
272
|
+
save_path: '`replace_me:<class ''str''>`'
|
|
273
|
+
|
|
274
|
+
```
|
|
275
|
+
</details>
|
|
276
|
+
This configuration defines an **agent** and a sequence of **templates** to fit a bertopic data based on incoming data.
|
|
277
|
+
|
|
278
|
+
To run the config, use the CLI:
|
|
279
|
+
```bash
|
|
280
|
+
sinapsis run name_of_config.yml
|
|
281
|
+
```
|
|
282
|
+
|
|
283
|
+
|
|
284
|
+
<h2 id="documentation">📙 Documentation</h2>
|
|
285
|
+
|
|
286
|
+
Documentation for this and other sinapsis packages is available on the [sinapsis website](https://docs.sinapsis.tech/docs)
|
|
287
|
+
|
|
288
|
+
Tutorials for different projects within sinapsis are available at [sinapsis tutorials page](https://docs.sinapsis.tech/tutorials)
|
|
289
|
+
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "sinapsis-bertopic"
|
|
3
|
+
version = "0.1.0"
|
|
4
|
+
description = "Add your description here"
|
|
5
|
+
readme = "README.md"
|
|
6
|
+
requires-python = ">=3.10"
|
|
7
|
+
dependencies = [
|
|
8
|
+
"bertopic>=0.17.4",
|
|
9
|
+
"kaleido>=1.2.0",
|
|
10
|
+
"pillow>=12.1.1",
|
|
11
|
+
"sinapsis>=0.2.25",
|
|
12
|
+
]
|
|
13
|
+
|
|
14
|
+
[project.optional-dependencies]
|
|
15
|
+
wikipedia-reader = [
|
|
16
|
+
"sinapsis-langchain-readers[langchain-wikipedia-readers]>=0.1.8",
|
|
17
|
+
]
|
|
18
|
+
sinapsis-data-writers = [
|
|
19
|
+
"sinapsis-data-writers[opencv]>=0.1.16",
|
|
20
|
+
]
|
|
21
|
+
all = [
|
|
22
|
+
"sinapsis-bertopic[sinapsis-data-writers,wikipedia-reader]",
|
|
23
|
+
]
|
|
24
|
+
|
|
25
|
+
[[tool.uv.index]]
|
|
26
|
+
name = "sinapsis-core"
|
|
27
|
+
url = "https://pypi.sinapsis.tech/"
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
[tool.ruff]
|
|
31
|
+
lint.select = [
|
|
32
|
+
"ARG",
|
|
33
|
+
"ANN",
|
|
34
|
+
"BLE",
|
|
35
|
+
"C4",
|
|
36
|
+
"E",
|
|
37
|
+
"F",
|
|
38
|
+
"FIX",
|
|
39
|
+
"FLY",
|
|
40
|
+
"I",
|
|
41
|
+
"PERF",
|
|
42
|
+
"PIE",
|
|
43
|
+
"RUF",
|
|
44
|
+
"RSE",
|
|
45
|
+
"SIM",
|
|
46
|
+
"SLOT",
|
|
47
|
+
"T10",
|
|
48
|
+
"T20",
|
|
49
|
+
"TD",
|
|
50
|
+
"TID",
|
|
51
|
+
]
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
lint.ignore = ['ANN401']
|
|
55
|
+
line-length = 120
|
|
56
|
+
show-fixes = true
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
[build-system]
|
|
60
|
+
requires = ["setuptools"]
|
|
61
|
+
build-backend = "setuptools.build_meta"
|
|
62
|
+
|
|
63
|
+
[project.urls]
|
|
64
|
+
Homepage = "https://sinapsis.tech"
|
|
65
|
+
Documentation = "https://docs.sinapsis.tech/docs"
|
|
66
|
+
Tutorials = "https://docs.sinapsis.tech/tutorials"
|
|
67
|
+
Repository = "https://github.com/Sinapsis-AI/sinapsis-bertopic.git"
|
|
File without changes
|
|
File without changes
|