PyPI - dstklib - Versions diffs - 2.0.0__tar.gz → 2.0.2__tar.gz - Mend

dstklib 2.0.0tar.gz → 2.0.2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (48) hide show

{dstklib-2.0.0 → dstklib-2.0.2}/PKG-INFO RENAMED Viewed

@@ -1,7 +1,7 @@
 Metadata-Version: 2.1
 Name: dstklib
-Version: 2.0.0
-Requires-Python: <3.12
+Version: 2.0.2
+Requires-Python: <3.13
 Description-Content-Type: text/markdown
 License-File: LICENSE
 Requires-Dist: spacy
@@ -28,7 +28,7 @@ This library is based on the book *Distributional Semantics* by Alessandro Lenci
 ## Documentation
-You can find the (temporal) basic documentation [here](https://distributional-semantics-toolkit-30269b.gitlab.io/index.html).
+You can find the (temporal) basic documentation [here](https://distributional-semantics-toolkit-30269b.gitlab.io/index.html). You can also find a demonstration of how to use it on Google Colab (in spanish) [here](https://colab.research.google.com/drive/1xN6AzovxQICDpdyFatFeJDIuZcNdCxC3?usp=sharing)
 ## Installation
@@ -209,6 +209,7 @@ Some common tasks in distributional semantics require the use of more than one m
 ```python
 from dstk.workflows.workflow_tools import WorkflowBuilder
+from dstk.workflows.stage_workflows import TextProcessing
 text = "The quick brown fox jumps over the lazy dog while the sun sets behind the hills."
 model = "my_spacy_model"
@@ -219,7 +220,7 @@ TextProcessorWorkflow = TextProcessing(
         "tokenizer": [
             {"apply_model": {"model": model}},
             {"get_tokens": {}},
-            {"remove_stop_words": {"custom_stop_words": stops}},
+            {"remove_stop_words": {"custom_stop_words": {}}},
         ],
         "ngrams": [
             {"extract_ngrams": {"window_size": 3}}
@@ -233,8 +234,9 @@ TextProcessorWorkflow = TextProcessing(
     }
 )
-tokens = TokenizerWorkflow(input_data=text)
+tokens = TextProcessorWorkflow(input_data=text)
+print(tokens)
 # Output: ['the quick brown', 'quick brown fox', 'brown fox jumps', 'fox jumps over', 'jumps over the', 'over the lazy', 'the lazy dog', 'lazy dog while', 'dog while the', 'while the sun', 'the sun set', 'sun set behind', 'set behind the', 'behind the hills']
 ```

{dstklib-2.0.0 → dstklib-2.0.2}/README.md RENAMED Viewed

@@ -12,7 +12,7 @@ This library is based on the book *Distributional Semantics* by Alessandro Lenci
 ## Documentation
-You can find the (temporal) basic documentation [here](https://distributional-semantics-toolkit-30269b.gitlab.io/index.html).
+You can find the (temporal) basic documentation [here](https://distributional-semantics-toolkit-30269b.gitlab.io/index.html). You can also find a demonstration of how to use it on Google Colab (in spanish) [here](https://colab.research.google.com/drive/1xN6AzovxQICDpdyFatFeJDIuZcNdCxC3?usp=sharing)
 ## Installation
@@ -193,6 +193,7 @@ Some common tasks in distributional semantics require the use of more than one m
 ```python
 from dstk.workflows.workflow_tools import WorkflowBuilder
+from dstk.workflows.stage_workflows import TextProcessing
 text = "The quick brown fox jumps over the lazy dog while the sun sets behind the hills."
 model = "my_spacy_model"
@@ -203,7 +204,7 @@ TextProcessorWorkflow = TextProcessing(
         "tokenizer": [
             {"apply_model": {"model": model}},
             {"get_tokens": {}},
-            {"remove_stop_words": {"custom_stop_words": stops}},
+            {"remove_stop_words": {"custom_stop_words": {}}},
         ],
         "ngrams": [
             {"extract_ngrams": {"window_size": 3}}
@@ -217,8 +218,9 @@ TextProcessorWorkflow = TextProcessing(
     }
 )
-tokens = TokenizerWorkflow(input_data=text)
+tokens = TextProcessorWorkflow(input_data=text)
+print(tokens)
 # Output: ['the quick brown', 'quick brown fox', 'brown fox jumps', 'fox jumps over', 'jumps over the', 'over the lazy', 'the lazy dog', 'lazy dog while', 'dog while the', 'while the sun', 'the sun set', 'sun set behind', 'set behind the', 'behind the hills']
 ```

{dstklib-2.0.0 → dstklib-2.0.2}/dstk/models/models.py RENAMED Viewed

@@ -83,7 +83,6 @@ def StandardModel(text: str, model: str | Language, custom_stop_words: list[str]
         module_name="count_models",
         template=CountModelsTemplate,
         workflow=[
-            {"scale_matrix": {}},
             {"svd_embeddings": {"n_components": n_components}}
         ]
     )

{dstklib-2.0.0 → dstklib-2.0.2}/dstk/templates/templates.py RENAMED Viewed

@@ -129,12 +129,6 @@ WeightMatrixTemplate: WorkflowTemplate = {
 CountModelsTemplate: WorkflowTemplate = {
     "steps": {
         0: {
-            "include": ["scale_matrix"],
-            "repeat": False,
-            "chaining": False,
-            "step_name": "scale_matrix"
-        },
-        1: {
             "include": "*",
             "repeat": False,
             "chaining": False,

{dstklib-2.0.0 → dstklib-2.0.2}/dstklib.egg-info/PKG-INFO RENAMED Viewed

@@ -1,7 +1,7 @@
 Metadata-Version: 2.1
 Name: dstklib
-Version: 2.0.0
-Requires-Python: <3.12
+Version: 2.0.2
+Requires-Python: <3.13
 Description-Content-Type: text/markdown
 License-File: LICENSE
 Requires-Dist: spacy
@@ -28,7 +28,7 @@ This library is based on the book *Distributional Semantics* by Alessandro Lenci
 ## Documentation
-You can find the (temporal) basic documentation [here](https://distributional-semantics-toolkit-30269b.gitlab.io/index.html).
+You can find the (temporal) basic documentation [here](https://distributional-semantics-toolkit-30269b.gitlab.io/index.html). You can also find a demonstration of how to use it on Google Colab (in spanish) [here](https://colab.research.google.com/drive/1xN6AzovxQICDpdyFatFeJDIuZcNdCxC3?usp=sharing)
 ## Installation
@@ -209,6 +209,7 @@ Some common tasks in distributional semantics require the use of more than one m
 ```python
 from dstk.workflows.workflow_tools import WorkflowBuilder
+from dstk.workflows.stage_workflows import TextProcessing
 text = "The quick brown fox jumps over the lazy dog while the sun sets behind the hills."
 model = "my_spacy_model"
@@ -219,7 +220,7 @@ TextProcessorWorkflow = TextProcessing(
         "tokenizer": [
             {"apply_model": {"model": model}},
             {"get_tokens": {}},
-            {"remove_stop_words": {"custom_stop_words": stops}},
+            {"remove_stop_words": {"custom_stop_words": {}}},
         ],
         "ngrams": [
             {"extract_ngrams": {"window_size": 3}}
@@ -233,8 +234,9 @@ TextProcessorWorkflow = TextProcessing(
     }
 )
-tokens = TokenizerWorkflow(input_data=text)
+tokens = TextProcessorWorkflow(input_data=text)
+print(tokens)
 # Output: ['the quick brown', 'quick brown fox', 'brown fox jumps', 'fox jumps over', 'jumps over the', 'over the lazy', 'the lazy dog', 'lazy dog while', 'dog while the', 'while the sun', 'the sun set', 'sun set behind', 'set behind the', 'behind the hills']
 ```

{dstklib-2.0.0 → dstklib-2.0.2}/setup.py RENAMED Viewed

@@ -5,7 +5,7 @@ with open("README.md", "r") as file:
 setup(
     name="dstklib",
-    version="2.0.0",
+    version="2.0.2",
     packages=find_packages(),
     install_requires=[
         "spacy",
@@ -18,7 +18,7 @@ setup(
         "kneed",
         "umap-learn"
     ],
-    python_requires="<3.12",
+    python_requires="<3.13",
     long_description=description,
     long_description_content_type="text/markdown"
 )