PyPI - rapid-textrank - Versions diffs - 0.1.0__tar.gz → 0.1.1__tar.gz - Mend

rapid-textrank 0.1.0tar.gz → 0.1.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (55) hide show

{rapid_textrank-0.1.0 → rapid_textrank-0.1.1}/Cargo.lock RENAMED Viewed

@@ -579,7 +579,7 @@ dependencies = [
 [[package]]
 name = "rapid_textrank"
-version = "0.1.0"
+version = "0.1.1"
 dependencies = [
  "approx",
  "criterion",

{rapid_textrank-0.1.0 → rapid_textrank-0.1.1}/Cargo.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [package]
 name = "rapid_textrank"
-version = "0.1.0"
+version = "0.1.1"
 edition = "2021"
 authors = ["TextRanker Contributors"]
 description = "High-performance TextRank implementation with Python bindings"

{rapid_textrank-0.1.0 → rapid_textrank-0.1.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: rapid_textrank
-Version: 0.1.0
+Version: 0.1.1
 Classifier: Development Status :: 4 - Beta
 Classifier: Intended Audience :: Developers
 Classifier: Intended Audience :: Science/Research
@@ -217,12 +217,16 @@ config = TextRankConfig(
     damping=0.85,              # PageRank damping factor (0-1)
     max_iterations=100,        # Maximum PageRank iterations
     convergence_threshold=1e-6,# Convergence threshold
-    window_size=4,             # Co-occurrence window size
+    window_size=3,             # Co-occurrence window size
     top_n=10,                  # Number of results
     min_phrase_length=1,       # Minimum words in a phrase
     max_phrase_length=4,       # Maximum words in a phrase
     score_aggregation="sum",   # How to combine word scores: "sum", "mean", "max", "rms"
-    language="en"              # Language for stopwords
+    language="en",             # Language for stopwords
+    include_pos=["NOUN","ADJ","PROPN","VERB"],  # POS tags to include in the graph
+    use_pos_in_nodes=True,     # If True, graph nodes are lemma+POS
+    phrase_grouping="scrubbed_text",   # "lemma" or "scrubbed_text"
+    stopwords=["custom", "terms"]  # Additional stopwords (extends built-in list)
 )
 extractor = BaseTextRank(config=config)

{rapid_textrank-0.1.0 → rapid_textrank-0.1.1}/README.md RENAMED Viewed

@@ -182,12 +182,16 @@ config = TextRankConfig(
     damping=0.85,              # PageRank damping factor (0-1)
     max_iterations=100,        # Maximum PageRank iterations
     convergence_threshold=1e-6,# Convergence threshold
-    window_size=4,             # Co-occurrence window size
+    window_size=3,             # Co-occurrence window size
     top_n=10,                  # Number of results
     min_phrase_length=1,       # Minimum words in a phrase
     max_phrase_length=4,       # Maximum words in a phrase
     score_aggregation="sum",   # How to combine word scores: "sum", "mean", "max", "rms"
-    language="en"              # Language for stopwords
+    language="en",             # Language for stopwords
+    include_pos=["NOUN","ADJ","PROPN","VERB"],  # POS tags to include in the graph
+    use_pos_in_nodes=True,     # If True, graph nodes are lemma+POS
+    phrase_grouping="scrubbed_text",   # "lemma" or "scrubbed_text"
+    stopwords=["custom", "terms"]  # Additional stopwords (extends built-in list)
 )
 extractor = BaseTextRank(config=config)

{rapid_textrank-0.1.0 → rapid_textrank-0.1.1}/notebooks/01_quickstart.ipynb RENAMED Viewed

@@ -193,17 +193,11 @@
    "source": [
     "from rapid_textrank import TextRankConfig, BaseTextRank\n",
     "\n",
-    "# Create a custom configuration\n",
+    "# Create a custom configuration (only overriding a few defaults)\n",
     "config = TextRankConfig(\n",
-    "    damping=0.85,              # PageRank damping factor (0-1)\n",
-    "    max_iterations=100,        # Maximum PageRank iterations\n",
-    "    convergence_threshold=1e-6,# Stop when scores change less than this\n",
-    "    window_size=4,             # Co-occurrence window size\n",
-    "    top_n=10,                  # Number of results\n",
-    "    min_phrase_length=1,       # Minimum words in a phrase\n",
-    "    max_phrase_length=4,       # Maximum words in a phrase\n",
-    "    score_aggregation=\"sum\",   # How to combine word scores: \"sum\", \"mean\", \"max\", \"rms\"\n",
-    "    language=\"en\"              # Language for stopwords\n",
+    "    top_n=10,\n",
+    "    score_aggregation=\"sum\",\n",
+    "    language=\"en\",\n",
     ")\n",
     "\n",
     "# Create an extractor with the config\n",
@@ -214,9 +208,10 @@
     "\n",
     "print(f\"Converged: {result.converged}\")\n",
     "print(f\"Iterations: {result.iterations}\")\n",
-    "print(f\"\\nTop phrases:\")\n",
+    "print(f\"\n",
+    "Top phrases:\")\n",
     "for p in result.phrases[:5]:\n",
-    "    print(f\"  {p.rank}. {p.text}: {p.score:.4f}\")"
+    "    print(f\"  {p.rank}. {p.text}: {p.score:.4f}\")\n"
    ]
   },
   {
@@ -317,7 +312,7 @@
      "output_type": "stream",
      "text": [
       "German keywords:\n",
-      "  1. Teilgebiet der künstlichen Intelligenz: 0.1860\n",
+      "  1. Teilgebiet der k\u00fcnstlichen Intelligenz: 0.1860\n",
       "  2. aus Erfahrung zu lernen: 0.1768\n",
       "  3. Netze mit vielen Schichten: 0.1184\n"
      ]
@@ -326,9 +321,9 @@
    "source": [
     "# German example\n",
     "german_text = \"\"\"\n",
-    "Maschinelles Lernen ist ein Teilgebiet der künstlichen Intelligenz.\n",
+    "Maschinelles Lernen ist ein Teilgebiet der k\u00fcnstlichen Intelligenz.\n",
     "Deep Learning verwendet neuronale Netze mit vielen Schichten.\n",
-    "Diese Technologie ermöglicht es Computern, aus Erfahrung zu lernen.\n",
+    "Diese Technologie erm\u00f6glicht es Computern, aus Erfahrung zu lernen.\n",
     "\"\"\"\n",
     "\n",
     "keywords_de = extract_keywords(german_text, top_n=5, language=\"de\")\n",
@@ -350,7 +345,7 @@
      "text": [
       "French keywords:\n",
       "  1. branche de l'intelligence artificielle: 0.1906\n",
-      "  2. l'analyse de données complexes: 0.1764\n",
+      "  2. l'analyse de donn\u00e9es complexes: 0.1764\n",
       "  3. de nombreux secteurs industriels: 0.1250\n"
      ]
     }
@@ -359,7 +354,7 @@
     "# French example\n",
     "french_text = \"\"\"\n",
     "L'apprentissage automatique est une branche de l'intelligence artificielle.\n",
-    "Les réseaux de neurones profonds permettent l'analyse de données complexes.\n",
+    "Les r\u00e9seaux de neurones profonds permettent l'analyse de donn\u00e9es complexes.\n",
     "Ces technologies transforment de nombreux secteurs industriels.\n",
     "\"\"\"\n",
     "\n",
@@ -395,10 +390,10 @@
       "Note: you may need to restart the kernel to use updated packages.\n",
       "Collecting en-core-web-sm==3.8.0\n",
       "  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl (12.8 MB)\n",
-      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m12.8/12.8 MB\u001b[0m \u001b[31m37.8 MB/s\u001b[0m  \u001b[33m0:00:00\u001b[0meta \u001b[36m0:00:01\u001b[0m\n",
+      "\u001b[2K     \u001b[90m\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u001b[0m \u001b[32m12.8/12.8 MB\u001b[0m \u001b[31m37.8 MB/s\u001b[0m  \u001b[33m0:00:00\u001b[0meta \u001b[36m0:00:01\u001b[0m\n",
       "\u001b[?25hInstalling collected packages: en-core-web-sm\n",
       "Successfully installed en-core-web-sm-3.8.0\n",
-      "\u001b[38;5;2m✔ Download and installation successful\u001b[0m\n",
+      "\u001b[38;5;2m\u2714 Download and installation successful\u001b[0m\n",
       "You can now load the package via spacy.load('en_core_web_sm')\n"
      ]
     }
@@ -484,4 +479,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 5
-}
+}

{rapid_textrank-0.1.0 → rapid_textrank-0.1.1}/notebooks/02_algorithm_variants.ipynb RENAMED Viewed

@@ -118,7 +118,7 @@
     "\n",
     "Based on [Florescu & Caragea (2017)](https://aclanthology.org/P17-1102/), PositionRank weights words by their position in the document.\n",
     "\n",
-    "**Key insight:** In many documents (papers, news articles, reports), important terms appear early—in titles, abstracts, or introductory paragraphs.\n",
+    "**Key insight:** In many documents (papers, news articles, reports), important terms appear early\u2014in titles, abstracts, or introductory paragraphs.\n",
     "\n",
     "**How it differs from BaseTextRank:**\n",
     "- Words appearing early get higher initial importance\n",
@@ -555,7 +555,7 @@
     "    ],\n",
     "    \"config\": {\n",
     "        \"top_n\": 5,\n",
-    "        \"window_size\": 4,\n",
+    "        \"window_size\": 3,\n",
     "        \"damping\": 0.85\n",
     "    }\n",
     "}\n",
@@ -565,7 +565,7 @@
     "\n",
     "print(\"Single document result:\")\n",
     "for phrase in result[\"phrases\"]:\n",
-    "    print(f\"  {phrase['text']}: {phrase['score']:.4f}\")"
+    "    print(f\"  {phrase['text']}: {phrase['score']:.4f}\")\n"
    ]
   },
   {
@@ -639,27 +639,27 @@
     "\n",
     "```\n",
     "                                START\n",
-    "                                  │\n",
-    "                                  ▼\n",
-    "                    ┌─────────────────────────┐\n",
-    "                    │ Do you have specific    │\n",
-    "                    │ topics to focus on?     │\n",
-    "                    └─────────────────────────┘\n",
-    "                         │              │\n",
+    "                                  \u2502\n",
+    "                                  \u25bc\n",
+    "                    \u250c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2510\n",
+    "                    \u2502 Do you have specific    \u2502\n",
+    "                    \u2502 topics to focus on?     \u2502\n",
+    "                    \u2514\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2518\n",
+    "                         \u2502              \u2502\n",
     "                        YES             NO\n",
-    "                         │              │\n",
-    "                         ▼              ▼\n",
-    "               ┌──────────────┐  ┌─────────────────────────┐\n",
-    "               │ BiasedTextRank│  │ Is key info at the     │\n",
-    "               │              │  │ beginning of the doc?   │\n",
-    "               └──────────────┘  └─────────────────────────┘\n",
-    "                                       │              │\n",
+    "                         \u2502              \u2502\n",
+    "                         \u25bc              \u25bc\n",
+    "               \u250c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2510  \u250c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2510\n",
+    "               \u2502 BiasedTextRank\u2502  \u2502 Is key info at the     \u2502\n",
+    "               \u2502              \u2502  \u2502 beginning of the doc?   \u2502\n",
+    "               \u2514\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2518  \u2514\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2518\n",
+    "                                       \u2502              \u2502\n",
     "                                      YES             NO\n",
-    "                                       │              │\n",
-    "                                       ▼              ▼\n",
-    "                              ┌──────────────┐ ┌──────────────┐\n",
-    "                              │ PositionRank │ │ BaseTextRank │\n",
-    "                              └──────────────┘ └──────────────┘\n",
+    "                                       \u2502              \u2502\n",
+    "                                       \u25bc              \u25bc\n",
+    "                              \u250c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2510 \u250c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2510\n",
+    "                              \u2502 PositionRank \u2502 \u2502 BaseTextRank \u2502\n",
+    "                              \u2514\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2518 \u2514\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2518\n",
     "```\n",
     "\n",
     "### Recommendations by Document Type\n",
@@ -707,4 +707,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 5
-}
+}

{rapid_textrank-0.1.0 → rapid_textrank-0.1.1}/notebooks/03_explain_algorithm.ipynb RENAMED Viewed

@@ -179,7 +179,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "def build_cooccurrence_graph(tokens, window_size=4):\n",
+    "def build_cooccurrence_graph(tokens, window_size=3):\n",
     "    \"\"\"Build a co-occurrence graph from tokens.\"\"\"\n",
     "    G = nx.Graph()\n",
     "    \n",
@@ -205,11 +205,11 @@
     "    return G\n",
     "\n",
     "# Build graph with default window size\n",
-    "G = build_cooccurrence_graph(tokens, window_size=4)\n",
+    "G = build_cooccurrence_graph(tokens, window_size=3)\n",
     "\n",
     "print(f\"Graph Statistics:\")\n",
     "print(f\"  Nodes: {G.number_of_nodes()}\")\n",
-    "print(f\"  Edges: {G.number_of_edges()}\")"
+    "print(f\"  Edges: {G.number_of_edges()}\")\n"
    ]
   },
   {
@@ -241,10 +241,10 @@
     "edge_labels = {(u, v): G[u][v]['weight'] for u, v in G.edges()}\n",
     "nx.draw_networkx_edge_labels(G, pos, edge_labels, font_size=8, ax=ax)\n",
     "\n",
-    "ax.set_title(\"Co-occurrence Graph (window_size=4)\", fontsize=14, fontweight='bold')\n",
+    "ax.set_title(\"Co-occurrence Graph (window_size=3)\", fontsize=14, fontweight='bold')\n",
     "ax.axis('off')\n",
     "plt.tight_layout()\n",
-    "plt.show()"
+    "plt.show()\n"
    ]
   },
   {
@@ -345,13 +345,13 @@
     "    return scores, history\n",
     "\n",
     "# Run PageRank\n",
-    "G = build_cooccurrence_graph(tokens, window_size=4)\n",
+    "G = build_cooccurrence_graph(tokens, window_size=3)\n",
     "scores, history = pagerank_with_history(G)\n",
     "\n",
     "print(f\"PageRank converged in {len(history)-1} iterations\")\n",
     "print(f\"\\nFinal scores (sorted by importance):\")\n",
     "for word, score in sorted(scores.items(), key=lambda x: -x[1]):\n",
-    "    print(f\"  {word:<15} {score:.4f}\")"
+    "    print(f\"  {word:<15} {score:.4f}\")\n"
    ]
   },
   {
@@ -391,7 +391,7 @@
     "# Visualize graph with node size proportional to score\n",
     "fig, ax = plt.subplots(figsize=(12, 8))\n",
     "\n",
-    "G = build_cooccurrence_graph(tokens, window_size=4)\n",
+    "G = build_cooccurrence_graph(tokens, window_size=3)\n",
     "pos = nx.spring_layout(G, k=2, iterations=50, seed=42)\n",
     "\n",
     "# Node sizes based on PageRank scores\n",
@@ -414,7 +414,7 @@
     "             fontsize=14, fontweight='bold')\n",
     "ax.axis('off')\n",
     "plt.tight_layout()\n",
-    "plt.show()"
+    "plt.show()\n"
    ]
   },
   {

{rapid_textrank-0.1.0 → rapid_textrank-0.1.1}/notebooks/04_benchmarks.ipynb RENAMED Viewed

@@ -27,7 +27,7 @@
      "output_type": "stream",
      "text": [
       "Note: you may need to restart the kernel to use updated packages.\n",
-      "\u001b[38;5;2m✔ Download and installation successful\u001b[0m\n",
+      "\u001b[38;5;2m\u2714 Download and installation successful\u001b[0m\n",
       "You can now load the package via spacy.load('en_core_web_sm')\n"
      ]
     }
@@ -240,20 +240,20 @@
       "\n",
       "SMALL TEXT (~16 words)\n",
       "--------------------------------------------------\n",
-      "rapid_textrank:      4.44 ms (±1.38)\n",
-      "pytextrank:          7.54 ms (±1.00)\n",
+      "rapid_textrank:      4.44 ms (\u00b11.38)\n",
+      "pytextrank:          7.54 ms (\u00b11.00)\n",
       "Speedup:              1.7x faster\n",
       "\n",
       "MEDIUM TEXT (~100 words)\n",
       "--------------------------------------------------\n",
-      "rapid_textrank:      3.08 ms (±0.32)\n",
-      "pytextrank:         66.14 ms (±104.73)\n",
+      "rapid_textrank:      3.08 ms (\u00b10.32)\n",
+      "pytextrank:         66.14 ms (\u00b1104.73)\n",
       "Speedup:             21.4x faster\n",
       "\n",
       "LARGE TEXT (~660 words)\n",
       "--------------------------------------------------\n",
-      "rapid_textrank:      4.12 ms (±0.83)\n",
-      "pytextrank:        184.31 ms (±26.20)\n",
+      "rapid_textrank:      4.12 ms (\u00b10.83)\n",
+      "pytextrank:        184.31 ms (\u00b126.20)\n",
       "Speedup:             44.7x faster\n"
      ]
     }
@@ -272,11 +272,11 @@
     "\n",
     "    # Benchmark rapid_textrank\n",
     "    rust_results = benchmark_rapid_textrank(text)\n",
-    "    print(f\"rapid_textrank:  {rust_results['mean']:>8.2f} ms (±{rust_results['std']:.2f})\")\n",
+    "    print(f\"rapid_textrank:  {rust_results['mean']:>8.2f} ms (\u00b1{rust_results['std']:.2f})\")\n",
     "\n",
     "    # Benchmark pytextrank\n",
     "    py_results = benchmark_pytextrank(text)\n",
-    "    print(f\"pytextrank:      {py_results['mean']:>8.2f} ms (±{py_results['std']:.2f})\")\n",
+    "    print(f\"pytextrank:      {py_results['mean']:>8.2f} ms (\u00b1{py_results['std']:.2f})\")\n",
     "\n",
     "    speedup = py_results['mean'] / rust_results['mean']\n",
     "    print(f\"Speedup:         {speedup:>8.1f}x faster\")\n",
@@ -496,20 +496,20 @@
       "\n",
       "SMALL TEXT (21 tokens)\n",
       "--------------------------------------------------\n",
-      "rapid_textrank (JSON API):     0.037 ms (±0.010)\n",
-      "pytextrank (extraction):       1.596 ms (±0.582)\n",
+      "rapid_textrank (JSON API):     0.037 ms (\u00b10.010)\n",
+      "pytextrank (extraction):       1.596 ms (\u00b10.582)\n",
       "Speedup:                        42.9x faster\n",
       "\n",
       "MEDIUM TEXT (138 tokens)\n",
       "--------------------------------------------------\n",
-      "rapid_textrank (JSON API):     0.231 ms (±0.101)\n",
-      "pytextrank (extraction):       2.881 ms (±0.472)\n",
+      "rapid_textrank (JSON API):     0.231 ms (\u00b10.101)\n",
+      "pytextrank (extraction):       2.881 ms (\u00b10.472)\n",
       "Speedup:                        12.5x faster\n",
       "\n",
       "LARGE TEXT (838 tokens)\n",
       "--------------------------------------------------\n",
-      "rapid_textrank (JSON API):     0.869 ms (±0.068)\n",
-      "pytextrank (extraction):       9.171 ms (±0.891)\n",
+      "rapid_textrank (JSON API):     0.869 ms (\u00b10.068)\n",
+      "pytextrank (extraction):       9.171 ms (\u00b10.891)\n",
       "Speedup:                        10.5x faster\n"
      ]
     }
@@ -529,10 +529,10 @@
     "    print(\"-\" * 50)\n",
     "    \n",
     "    rapid_res = benchmark_rapid_extraction_only(tokens)\n",
-    "    print(f\"rapid_textrank (JSON API):  {rapid_res['mean']:>8.3f} ms (±{rapid_res['std']:.3f})\")\n",
+    "    print(f\"rapid_textrank (JSON API):  {rapid_res['mean']:>8.3f} ms (\u00b1{rapid_res['std']:.3f})\")\n",
     "    \n",
     "    py_res = benchmark_pytextrank_extraction_only(doc)\n",
-    "    print(f\"pytextrank (extraction):    {py_res['mean']:>8.3f} ms (±{py_res['std']:.3f})\")\n",
+    "    print(f\"pytextrank (extraction):    {py_res['mean']:>8.3f} ms (\u00b1{py_res['std']:.3f})\")\n",
     "    \n",
     "    speedup = py_res['mean'] / rapid_res['mean'] if rapid_res['mean'] > 0 else float('inf')\n",
     "    print(f\"Speedup:                    {speedup:>8.1f}x faster\")\n",
@@ -647,9 +647,9 @@
      "text": [
       "Benchmarking batch processing (100 documents)...\n",
       "============================================================\n",
-      "rapid_textrank (batch JSON):      18.50 ms (±0.37)\n",
+      "rapid_textrank (batch JSON):      18.50 ms (\u00b10.37)\n",
       "  Per document:                   0.185 ms\n",
-      "pytextrank (sequential):        3431.75 ms (±849.32)\n",
+      "pytextrank (sequential):        3431.75 ms (\u00b1849.32)\n",
       "  Per document:                  34.317 ms\n",
       "\n",
       "Speedup: 185.5x faster\n"
@@ -697,11 +697,11 @@
     "print(\"=\" * 60)\n",
     "\n",
     "rapid_mean, rapid_std = benchmark_rapid_batch(batch_tokens)\n",
-    "print(f\"rapid_textrank (batch JSON): {rapid_mean:>10.2f} ms (±{rapid_std:.2f})\")\n",
+    "print(f\"rapid_textrank (batch JSON): {rapid_mean:>10.2f} ms (\u00b1{rapid_std:.2f})\")\n",
     "print(f\"  Per document:              {rapid_mean/num_docs:>10.3f} ms\")\n",
     "\n",
     "py_mean, py_std = benchmark_pytextrank_batch(batch_texts)\n",
-    "print(f\"pytextrank (sequential):     {py_mean:>10.2f} ms (±{py_std:.2f})\")\n",
+    "print(f\"pytextrank (sequential):     {py_mean:>10.2f} ms (\u00b1{py_std:.2f})\")\n",
     "print(f\"  Per document:              {py_mean/num_docs:>10.3f} ms\")\n",
     "\n",
     "speedup = py_mean / rapid_mean\n",
@@ -978,4 +978,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 5
-}
+}

{rapid_textrank-0.1.0 → rapid_textrank-0.1.1}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "maturin"
 [project]
 name = "rapid_textrank"
-version = "0.1.0"
+version = "0.1.1"
 description = "High-performance TextRank implementation with Python bindings"
 readme = { file = "README.md", content-type = "text/markdown" }
 license = "MIT"

{rapid_textrank-0.1.0 → rapid_textrank-0.1.1}/python/rapid_textrank/spacy_component.py RENAMED Viewed

@@ -79,11 +79,16 @@ if SPACY_AVAILABLE:
             "damping": 0.85,
             "max_iterations": 100,
             "convergence_threshold": 1e-6,
-            "window_size": 4,
+            "window_size": 3,
             "top_n": 10,
             "min_phrase_length": 1,
             "max_phrase_length": 4,
             "score_aggregation": "sum",
+            "include_pos": ["ADJ", "NOUN", "PROPN", "VERB"],
+            "use_pos_in_nodes": True,
+            "phrase_grouping": "scrubbed_text",
+            "language": "en",
+            "stopwords": None,
         },
     )
     def create_rapid_textrank(
@@ -97,6 +102,11 @@ if SPACY_AVAILABLE:
         min_phrase_length: int,
         max_phrase_length: int,
         score_aggregation: str,
+        include_pos: Optional[List[str]],
+        use_pos_in_nodes: bool,
+        phrase_grouping: str,
+        language: str,
+        stopwords: Optional[List[str]],
     ):
         """Create a RustTextRank pipeline component."""
         return RustTextRank(
@@ -110,6 +120,11 @@ if SPACY_AVAILABLE:
             min_phrase_length=min_phrase_length,
             max_phrase_length=max_phrase_length,
             score_aggregation=score_aggregation,
+            include_pos=include_pos,
+            use_pos_in_nodes=use_pos_in_nodes,
+            phrase_grouping=phrase_grouping,
+            language=language,
+            stopwords=stopwords,
         )
     class RustTextRank:
@@ -135,11 +150,16 @@ if SPACY_AVAILABLE:
             damping: float = 0.85,
             max_iterations: int = 100,
             convergence_threshold: float = 1e-6,
-            window_size: int = 4,
+            window_size: int = 3,
             top_n: int = 10,
             min_phrase_length: int = 1,
             max_phrase_length: int = 4,
             score_aggregation: str = "sum",
+            include_pos: Optional[List[str]] = None,
+            use_pos_in_nodes: bool = True,
+            phrase_grouping: str = "scrubbed_text",
+            language: str = "en",
+            stopwords: Optional[List[str]] = None,
         ):
             self.nlp = nlp
             self.name = name
@@ -152,7 +172,14 @@ if SPACY_AVAILABLE:
                 "min_phrase_length": min_phrase_length,
                 "max_phrase_length": max_phrase_length,
                 "score_aggregation": score_aggregation,
+                "use_pos_in_nodes": use_pos_in_nodes,
+                "phrase_grouping": phrase_grouping,
+                "language": language,
             }
+            if include_pos is not None:
+                self.config["include_pos"] = include_pos
+            if stopwords is not None:
+                self.config["stopwords"] = stopwords
             # Register custom extensions
             if not Doc.has_extension("phrases"):

rapid-textrank 0.1.0__tar.gz → 0.1.1__tar.gz

rapid-textrank 0.1.0tar.gz → 0.1.1tar.gz