pycorpdiff 0.1.0a1__tar.gz → 0.1.0a3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (128) hide show
  1. {pycorpdiff-0.1.0a1 → pycorpdiff-0.1.0a3}/CHANGELOG.md +1 -1
  2. {pycorpdiff-0.1.0a1 → pycorpdiff-0.1.0a3}/CITATION.cff +1 -1
  3. {pycorpdiff-0.1.0a1 → pycorpdiff-0.1.0a3}/PKG-INFO +47 -33
  4. {pycorpdiff-0.1.0a1 → pycorpdiff-0.1.0a3}/README.md +44 -30
  5. {pycorpdiff-0.1.0a1 → pycorpdiff-0.1.0a3}/pyproject.toml +3 -3
  6. {pycorpdiff-0.1.0a1 → pycorpdiff-0.1.0a3}/src/pycorpdiff/__init__.py +2 -2
  7. {pycorpdiff-0.1.0a1 → pycorpdiff-0.1.0a3}/.gitignore +0 -0
  8. {pycorpdiff-0.1.0a1 → pycorpdiff-0.1.0a3}/LICENSE +0 -0
  9. {pycorpdiff-0.1.0a1 → pycorpdiff-0.1.0a3}/src/pycorpdiff/_backends/__init__.py +0 -0
  10. {pycorpdiff-0.1.0a1 → pycorpdiff-0.1.0a3}/src/pycorpdiff/_backends/pandas.py +0 -0
  11. {pycorpdiff-0.1.0a1 → pycorpdiff-0.1.0a3}/src/pycorpdiff/_backends/polars.py +0 -0
  12. {pycorpdiff-0.1.0a1 → pycorpdiff-0.1.0a3}/src/pycorpdiff/collocation/__init__.py +0 -0
  13. {pycorpdiff-0.1.0a1 → pycorpdiff-0.1.0a3}/src/pycorpdiff/collocation/cooccurrence.py +0 -0
  14. {pycorpdiff-0.1.0a1 → pycorpdiff-0.1.0a3}/src/pycorpdiff/collocation/measures.py +0 -0
  15. {pycorpdiff-0.1.0a1 → pycorpdiff-0.1.0a3}/src/pycorpdiff/collocation/network.py +0 -0
  16. {pycorpdiff-0.1.0a1 → pycorpdiff-0.1.0a3}/src/pycorpdiff/collocation/shift.py +0 -0
  17. {pycorpdiff-0.1.0a1 → pycorpdiff-0.1.0a3}/src/pycorpdiff/compare.py +0 -0
  18. {pycorpdiff-0.1.0a1 → pycorpdiff-0.1.0a3}/src/pycorpdiff/corpus.py +0 -0
  19. {pycorpdiff-0.1.0a1 → pycorpdiff-0.1.0a3}/src/pycorpdiff/datasets/__init__.py +0 -0
  20. {pycorpdiff-0.1.0a1 → pycorpdiff-0.1.0a3}/src/pycorpdiff/datasets/_data/hansard_sample.parquet +0 -0
  21. {pycorpdiff-0.1.0a1 → pycorpdiff-0.1.0a3}/src/pycorpdiff/datasets/_generate_hansard.py +0 -0
  22. {pycorpdiff-0.1.0a1 → pycorpdiff-0.1.0a3}/src/pycorpdiff/datasets/hansard.py +0 -0
  23. {pycorpdiff-0.1.0a1 → pycorpdiff-0.1.0a3}/src/pycorpdiff/datasets/histwords.py +0 -0
  24. {pycorpdiff-0.1.0a1 → pycorpdiff-0.1.0a3}/src/pycorpdiff/explain.py +0 -0
  25. {pycorpdiff-0.1.0a1 → pycorpdiff-0.1.0a3}/src/pycorpdiff/io/__init__.py +0 -0
  26. {pycorpdiff-0.1.0a1 → pycorpdiff-0.1.0a3}/src/pycorpdiff/io/duckdb.py +0 -0
  27. {pycorpdiff-0.1.0a1 → pycorpdiff-0.1.0a3}/src/pycorpdiff/io/huggingface.py +0 -0
  28. {pycorpdiff-0.1.0a1 → pycorpdiff-0.1.0a3}/src/pycorpdiff/io/readers.py +0 -0
  29. {pycorpdiff-0.1.0a1 → pycorpdiff-0.1.0a3}/src/pycorpdiff/keyness/__init__.py +0 -0
  30. {pycorpdiff-0.1.0a1 → pycorpdiff-0.1.0a3}/src/pycorpdiff/keyness/bayes.py +0 -0
  31. {pycorpdiff-0.1.0a1 → pycorpdiff-0.1.0a3}/src/pycorpdiff/keyness/chi_squared.py +0 -0
  32. {pycorpdiff-0.1.0a1 → pycorpdiff-0.1.0a3}/src/pycorpdiff/keyness/correction.py +0 -0
  33. {pycorpdiff-0.1.0a1 → pycorpdiff-0.1.0a3}/src/pycorpdiff/keyness/dispersion.py +0 -0
  34. {pycorpdiff-0.1.0a1 → pycorpdiff-0.1.0a3}/src/pycorpdiff/keyness/effect_sizes.py +0 -0
  35. {pycorpdiff-0.1.0a1 → pycorpdiff-0.1.0a3}/src/pycorpdiff/keyness/loglikelihood.py +0 -0
  36. {pycorpdiff-0.1.0a1 → pycorpdiff-0.1.0a3}/src/pycorpdiff/keyness/multicorpus.py +0 -0
  37. {pycorpdiff-0.1.0a1 → pycorpdiff-0.1.0a3}/src/pycorpdiff/keyness/permutation.py +0 -0
  38. {pycorpdiff-0.1.0a1 → pycorpdiff-0.1.0a3}/src/pycorpdiff/py.typed +0 -0
  39. {pycorpdiff-0.1.0a1 → pycorpdiff-0.1.0a3}/src/pycorpdiff/results.py +0 -0
  40. {pycorpdiff-0.1.0a1 → pycorpdiff-0.1.0a3}/src/pycorpdiff/semantic/__init__.py +0 -0
  41. {pycorpdiff-0.1.0a1 → pycorpdiff-0.1.0a3}/src/pycorpdiff/semantic/alignment.py +0 -0
  42. {pycorpdiff-0.1.0a1 → pycorpdiff-0.1.0a3}/src/pycorpdiff/semantic/embed.py +0 -0
  43. {pycorpdiff-0.1.0a1 → pycorpdiff-0.1.0a3}/src/pycorpdiff/semantic/shift.py +0 -0
  44. {pycorpdiff-0.1.0a1 → pycorpdiff-0.1.0a3}/src/pycorpdiff/semantic/trajectory.py +0 -0
  45. {pycorpdiff-0.1.0a1 → pycorpdiff-0.1.0a3}/src/pycorpdiff/stats.py +0 -0
  46. {pycorpdiff-0.1.0a1 → pycorpdiff-0.1.0a3}/src/pycorpdiff/temporal/__init__.py +0 -0
  47. {pycorpdiff-0.1.0a1 → pycorpdiff-0.1.0a3}/src/pycorpdiff/temporal/bocpd.py +0 -0
  48. {pycorpdiff-0.1.0a1 → pycorpdiff-0.1.0a3}/src/pycorpdiff/temporal/causal_impact.py +0 -0
  49. {pycorpdiff-0.1.0a1 → pycorpdiff-0.1.0a3}/src/pycorpdiff/temporal/changepoint.py +0 -0
  50. {pycorpdiff-0.1.0a1 → pycorpdiff-0.1.0a3}/src/pycorpdiff/temporal/forecast.py +0 -0
  51. {pycorpdiff-0.1.0a1 → pycorpdiff-0.1.0a3}/src/pycorpdiff/temporal/its.py +0 -0
  52. {pycorpdiff-0.1.0a1 → pycorpdiff-0.1.0a3}/src/pycorpdiff/temporal/slicing.py +0 -0
  53. {pycorpdiff-0.1.0a1 → pycorpdiff-0.1.0a3}/src/pycorpdiff/tokenize.py +0 -0
  54. {pycorpdiff-0.1.0a1 → pycorpdiff-0.1.0a3}/src/pycorpdiff/viz/__init__.py +0 -0
  55. {pycorpdiff-0.1.0a1 → pycorpdiff-0.1.0a3}/src/pycorpdiff/viz/bocpd.py +0 -0
  56. {pycorpdiff-0.1.0a1 → pycorpdiff-0.1.0a3}/src/pycorpdiff/viz/causal_impact.py +0 -0
  57. {pycorpdiff-0.1.0a1 → pycorpdiff-0.1.0a3}/src/pycorpdiff/viz/collocation.py +0 -0
  58. {pycorpdiff-0.1.0a1 → pycorpdiff-0.1.0a3}/src/pycorpdiff/viz/dispersion.py +0 -0
  59. {pycorpdiff-0.1.0a1 → pycorpdiff-0.1.0a3}/src/pycorpdiff/viz/forecast.py +0 -0
  60. {pycorpdiff-0.1.0a1 → pycorpdiff-0.1.0a3}/src/pycorpdiff/viz/keyness.py +0 -0
  61. {pycorpdiff-0.1.0a1 → pycorpdiff-0.1.0a3}/src/pycorpdiff/viz/network.py +0 -0
  62. {pycorpdiff-0.1.0a1 → pycorpdiff-0.1.0a3}/src/pycorpdiff/viz/scattertext.py +0 -0
  63. {pycorpdiff-0.1.0a1 → pycorpdiff-0.1.0a3}/src/pycorpdiff/viz/semantic_forecast.py +0 -0
  64. {pycorpdiff-0.1.0a1 → pycorpdiff-0.1.0a3}/src/pycorpdiff/viz/trajectory.py +0 -0
  65. {pycorpdiff-0.1.0a1 → pycorpdiff-0.1.0a3}/tests/__init__.py +0 -0
  66. {pycorpdiff-0.1.0a1 → pycorpdiff-0.1.0a3}/tests/conftest.py +0 -0
  67. {pycorpdiff-0.1.0a1 → pycorpdiff-0.1.0a3}/tests/fixtures/__init__.py +0 -0
  68. {pycorpdiff-0.1.0a1 → pycorpdiff-0.1.0a3}/tests/integration/__init__.py +0 -0
  69. {pycorpdiff-0.1.0a1 → pycorpdiff-0.1.0a3}/tests/integration/test_collocation_integration.py +0 -0
  70. {pycorpdiff-0.1.0a1 → pycorpdiff-0.1.0a3}/tests/integration/test_crossval_histwords.py +0 -0
  71. {pycorpdiff-0.1.0a1 → pycorpdiff-0.1.0a3}/tests/integration/test_crossval_nltk.py +0 -0
  72. {pycorpdiff-0.1.0a1 → pycorpdiff-0.1.0a3}/tests/integration/test_crossval_quanteda.py +0 -0
  73. {pycorpdiff-0.1.0a1 → pycorpdiff-0.1.0a3}/tests/integration/test_crossval_rayson.py +0 -0
  74. {pycorpdiff-0.1.0a1 → pycorpdiff-0.1.0a3}/tests/integration/test_crossval_scattertext.py +0 -0
  75. {pycorpdiff-0.1.0a1 → pycorpdiff-0.1.0a3}/tests/integration/test_explain_integration.py +0 -0
  76. {pycorpdiff-0.1.0a1 → pycorpdiff-0.1.0a3}/tests/integration/test_keyness_integration.py +0 -0
  77. {pycorpdiff-0.1.0a1 → pycorpdiff-0.1.0a3}/tests/integration/test_sbert_slow.py +0 -0
  78. {pycorpdiff-0.1.0a1 → pycorpdiff-0.1.0a3}/tests/integration/test_semantic_integration.py +0 -0
  79. {pycorpdiff-0.1.0a1 → pycorpdiff-0.1.0a3}/tests/integration/test_stop_words.py +0 -0
  80. {pycorpdiff-0.1.0a1 → pycorpdiff-0.1.0a3}/tests/integration/test_temporal_stats.py +0 -0
  81. {pycorpdiff-0.1.0a1 → pycorpdiff-0.1.0a3}/tests/integration/test_viz.py +0 -0
  82. {pycorpdiff-0.1.0a1 → pycorpdiff-0.1.0a3}/tests/property/__init__.py +0 -0
  83. {pycorpdiff-0.1.0a1 → pycorpdiff-0.1.0a3}/tests/property/test_collocation_properties.py +0 -0
  84. {pycorpdiff-0.1.0a1 → pycorpdiff-0.1.0a3}/tests/property/test_keyness_properties.py +0 -0
  85. {pycorpdiff-0.1.0a1 → pycorpdiff-0.1.0a3}/tests/property/test_temporal_properties.py +0 -0
  86. {pycorpdiff-0.1.0a1 → pycorpdiff-0.1.0a3}/tests/unit/__init__.py +0 -0
  87. {pycorpdiff-0.1.0a1 → pycorpdiff-0.1.0a3}/tests/unit/test_bayes_factor.py +0 -0
  88. {pycorpdiff-0.1.0a1 → pycorpdiff-0.1.0a3}/tests/unit/test_bocpd.py +0 -0
  89. {pycorpdiff-0.1.0a1 → pycorpdiff-0.1.0a3}/tests/unit/test_causal_impact.py +0 -0
  90. {pycorpdiff-0.1.0a1 → pycorpdiff-0.1.0a3}/tests/unit/test_changepoint.py +0 -0
  91. {pycorpdiff-0.1.0a1 → pycorpdiff-0.1.0a3}/tests/unit/test_chi_squared.py +0 -0
  92. {pycorpdiff-0.1.0a1 → pycorpdiff-0.1.0a3}/tests/unit/test_collocation_cooccurrence.py +0 -0
  93. {pycorpdiff-0.1.0a1 → pycorpdiff-0.1.0a3}/tests/unit/test_collocation_measures.py +0 -0
  94. {pycorpdiff-0.1.0a1 → pycorpdiff-0.1.0a3}/tests/unit/test_collocation_shift.py +0 -0
  95. {pycorpdiff-0.1.0a1 → pycorpdiff-0.1.0a3}/tests/unit/test_comparison_concordance.py +0 -0
  96. {pycorpdiff-0.1.0a1 → pycorpdiff-0.1.0a3}/tests/unit/test_cooccurrence_network.py +0 -0
  97. {pycorpdiff-0.1.0a1 → pycorpdiff-0.1.0a3}/tests/unit/test_corpus_hash.py +0 -0
  98. {pycorpdiff-0.1.0a1 → pycorpdiff-0.1.0a3}/tests/unit/test_corpus_vocab.py +0 -0
  99. {pycorpdiff-0.1.0a1 → pycorpdiff-0.1.0a3}/tests/unit/test_correction.py +0 -0
  100. {pycorpdiff-0.1.0a1 → pycorpdiff-0.1.0a3}/tests/unit/test_datasets_hansard.py +0 -0
  101. {pycorpdiff-0.1.0a1 → pycorpdiff-0.1.0a3}/tests/unit/test_dispersion.py +0 -0
  102. {pycorpdiff-0.1.0a1 → pycorpdiff-0.1.0a3}/tests/unit/test_dispersion_plot.py +0 -0
  103. {pycorpdiff-0.1.0a1 → pycorpdiff-0.1.0a3}/tests/unit/test_doc_term_counts_sparse.py +0 -0
  104. {pycorpdiff-0.1.0a1 → pycorpdiff-0.1.0a3}/tests/unit/test_effect_sizes.py +0 -0
  105. {pycorpdiff-0.1.0a1 → pycorpdiff-0.1.0a3}/tests/unit/test_embedders.py +0 -0
  106. {pycorpdiff-0.1.0a1 → pycorpdiff-0.1.0a3}/tests/unit/test_explain.py +0 -0
  107. {pycorpdiff-0.1.0a1 → pycorpdiff-0.1.0a3}/tests/unit/test_forecast.py +0 -0
  108. {pycorpdiff-0.1.0a1 → pycorpdiff-0.1.0a3}/tests/unit/test_forecast_semantic_drift.py +0 -0
  109. {pycorpdiff-0.1.0a1 → pycorpdiff-0.1.0a3}/tests/unit/test_from_huggingface.py +0 -0
  110. {pycorpdiff-0.1.0a1 → pycorpdiff-0.1.0a3}/tests/unit/test_hansard_fetcher.py +0 -0
  111. {pycorpdiff-0.1.0a1 → pycorpdiff-0.1.0a3}/tests/unit/test_histwords_loader.py +0 -0
  112. {pycorpdiff-0.1.0a1 → pycorpdiff-0.1.0a3}/tests/unit/test_its.py +0 -0
  113. {pycorpdiff-0.1.0a1 → pycorpdiff-0.1.0a3}/tests/unit/test_keyness_multi.py +0 -0
  114. {pycorpdiff-0.1.0a1 → pycorpdiff-0.1.0a3}/tests/unit/test_loglikelihood.py +0 -0
  115. {pycorpdiff-0.1.0a1 → pycorpdiff-0.1.0a3}/tests/unit/test_ngram_tokenizer.py +0 -0
  116. {pycorpdiff-0.1.0a1 → pycorpdiff-0.1.0a3}/tests/unit/test_permutation_keyness.py +0 -0
  117. {pycorpdiff-0.1.0a1 → pycorpdiff-0.1.0a3}/tests/unit/test_polars_interop.py +0 -0
  118. {pycorpdiff-0.1.0a1 → pycorpdiff-0.1.0a3}/tests/unit/test_procrustes.py +0 -0
  119. {pycorpdiff-0.1.0a1 → pycorpdiff-0.1.0a3}/tests/unit/test_read_duckdb.py +0 -0
  120. {pycorpdiff-0.1.0a1 → pycorpdiff-0.1.0a3}/tests/unit/test_read_txt_line_mode.py +0 -0
  121. {pycorpdiff-0.1.0a1 → pycorpdiff-0.1.0a3}/tests/unit/test_result_exports.py +0 -0
  122. {pycorpdiff-0.1.0a1 → pycorpdiff-0.1.0a3}/tests/unit/test_scattertext_plot.py +0 -0
  123. {pycorpdiff-0.1.0a1 → pycorpdiff-0.1.0a3}/tests/unit/test_semantic_neighbours.py +0 -0
  124. {pycorpdiff-0.1.0a1 → pycorpdiff-0.1.0a3}/tests/unit/test_semantic_shift.py +0 -0
  125. {pycorpdiff-0.1.0a1 → pycorpdiff-0.1.0a3}/tests/unit/test_semantic_trajectory.py +0 -0
  126. {pycorpdiff-0.1.0a1 → pycorpdiff-0.1.0a3}/tests/unit/test_smoke.py +0 -0
  127. {pycorpdiff-0.1.0a1 → pycorpdiff-0.1.0a3}/tests/unit/test_temporal.py +0 -0
  128. {pycorpdiff-0.1.0a1 → pycorpdiff-0.1.0a3}/tests/unit/test_wilson_ci.py +0 -0
@@ -4,7 +4,7 @@ All notable changes to `pycorpdiff` are documented in this file. The format
4
4
  follows [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this
5
5
  project adheres to [Semantic Versioning](https://semver.org/).
6
6
 
7
- ## [0.1.0a1] — initial release
7
+ ## [0.1.0a3] — initial release
8
8
 
9
9
  The initial public release of `pycorpdiff` — comparative corpus analysis
10
10
  for modern Python workflows. Three public verbs (`compare`, `track`,
@@ -4,7 +4,7 @@ message: >
4
4
  entry. GitHub renders a "Cite this repository" widget directly from
5
5
  this file.
6
6
  title: "pycorpdiff: Comparative Corpus Analysis for Modern Python Workflows"
7
- version: 0.1.0a1
7
+ version: 0.1.0a3
8
8
  date-released: 2026-05-22
9
9
  authors:
10
10
  - family-names: Turner
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pycorpdiff
3
- Version: 0.1.0a1
3
+ Version: 0.1.0a3
4
4
  Summary: Comparative corpus analysis for Python: keyness, collocations, semantic shift, temporal trajectories with changepoints + causal inference.
5
5
  Project-URL: Homepage, https://github.com/jturner-uofl/pycorpdiff
6
6
  Project-URL: Documentation, https://github.com/jturner-uofl/pycorpdiff
@@ -53,7 +53,7 @@ Requires-Dist: matplotlib>=3.8; extra == 'all'
53
53
  Requires-Dist: networkx>=3.1; extra == 'all'
54
54
  Requires-Dist: polars>=1.0; extra == 'all'
55
55
  Requires-Dist: pyarrow>=15; extra == 'all'
56
- Requires-Dist: pysofra>=0.1.0a2; extra == 'all'
56
+ Requires-Dist: pysofra>=0.1.0a3; extra == 'all'
57
57
  Requires-Dist: ruptures>=1.1; extra == 'all'
58
58
  Requires-Dist: scikit-learn>=1.3; extra == 'all'
59
59
  Requires-Dist: sentence-transformers>=2.2; extra == 'all'
@@ -76,7 +76,7 @@ Provides-Extra: nlp
76
76
  Requires-Dist: spacy>=3.7; extra == 'nlp'
77
77
  Provides-Extra: notebooks
78
78
  Requires-Dist: jupyter>=1.0; extra == 'notebooks'
79
- Requires-Dist: pysofra>=0.1.0a2; extra == 'notebooks'
79
+ Requires-Dist: pysofra>=0.1.0a3; extra == 'notebooks'
80
80
  Requires-Dist: vl-convert-python>=1.5; extra == 'notebooks'
81
81
  Provides-Extra: polars
82
82
  Requires-Dist: polars>=1.0; extra == 'polars'
@@ -130,7 +130,7 @@ points — one-line adapters, no plugin registry. The base install pulls
130
130
  only `numpy`, `pandas`, `scipy`, and `pyarrow`; everything else is opt-in
131
131
  via extras.
132
132
 
133
- > **Status: alpha (0.1.0a1).** Public API is stable for the features
133
+ > **Status: alpha (0.1.0a3).** Public API is stable for the features
134
134
  > described below; on PyPI as `pip install pycorpdiff`.
135
135
 
136
136
  ## The three-layer architecture
@@ -143,48 +143,62 @@ via extras.
143
143
 
144
144
  ## Quick start
145
145
 
146
+ ```bash
147
+ pip install "pycorpdiff[viz]"
148
+ ```
149
+
146
150
  ```python
147
151
  import pycorpdiff as pcd
148
152
 
149
- # Bundled synthetic UK-Hansard corpus — runs offline, no data needed.
153
+ # Bundled UK-Hansard sample — runs offline, no data download.
150
154
  corpus = pcd.load_hansard_sample()
151
155
  immigration = corpus.slice(topic="immigration")
152
- human = immigration.slice(frame="humanising")
153
- criminal = immigration.slice(frame="criminalising")
154
-
155
- # Compare — three verbs
156
- k = pcd.compare(human, criminal).keyness()
157
- c = pcd.compare(human, criminal).collocation_shift("immigrant")
158
- # s = pcd.compare(human, criminal).semantic_shift("immigrant", embedder=pcd.SBERTEmbedder())
159
- # requires `pip install "pycorpdiff[semantic]"`
160
-
161
- # Track over time
162
- tr = pcd.track(immigration, "criminal").over_time(freq="Y")
163
- tr.changepoints() # offline PELT
164
- tr.changepoints_online(hazard=1/24) # Bayesian online (Adams & MacKay 2007)
165
- tr.interrupted_time_series(event_date="2016") # segmented OLS
166
- tr.causal_impact(event_date="2016") # Bayesian counterfactual (Brodersen 2015)
167
- tr.forecast(horizon=4) # state-space ETS
156
+
157
+ # Which words separate the humanising and criminalising frames?
158
+ keyness = pcd.compare(
159
+ immigration.slice(frame="humanising"),
160
+ immigration.slice(frame="criminalising"),
161
+ ).keyness(min_count=3)
162
+
163
+ keyness.plot() # volcano plot picture the result
164
+ # keyness.table.head(10) # or look at the ranked table directly
165
+ # keyness.explain("criminal") # KWIC concordances showing the textual evidence
166
+ ```
167
+
168
+ That's the entire surface in five lines: load a corpus, slice it,
169
+ compare two slices, plot the result. Every other analytical method —
170
+ collocation shifts, semantic drift, temporal trajectories, changepoint
171
+ detection, causal-impact analysis, forecasting, co-occurrence networks,
172
+ N-way keyness — follows the same shape. See
173
+ [the showcase notebook](docs/rendered/pycorpdiff_showcase.html) for the
174
+ full feature tour, or the cheat sheet below for one-line API previews.
175
+
176
+ ### Cheat sheet — every analytical surface in one block
177
+
178
+ ```python
179
+ # Compare verbs (returns Result objects with .plot / .to_df / .explain / .summary)
180
+ pcd.compare(a, b).keyness()
181
+ pcd.compare(a, b).collocation_shift("migrant")
182
+ pcd.compare(a, b).semantic_shift("migrant", embedder=pcd.SBERTEmbedder()) # [semantic]
183
+
184
+ # Track over time (requires [temporal] for the changepoint + ITS + forecast + causal_impact methods)
185
+ tr = pcd.track(corpus, "migrant").over_time(freq="Y")
186
+ tr.changepoints() # offline PELT
187
+ tr.changepoints_online(hazard=1/24) # Bayesian online (Adams & MacKay 2007)
188
+ tr.interrupted_time_series(event_date="2016") # segmented OLS
189
+ tr.causal_impact(event_date="2016") # Bayesian counterfactual (Brodersen 2015)
190
+ tr.forecast(horizon=4) # state-space ETS
168
191
 
169
192
  # Before / after a known event
170
193
  pcd.compare.before_after(corpus, event_date="2016-06-23").keyness()
171
194
 
172
- # N-way (≥ 2 corpora) — one keyness across all four parties
173
- parties = ["Conservative", "Labour", "Liberal Democrat", "SNP"]
174
- nhs = corpus.slice(topic="nhs")
175
- pcd.keyness_multi([nhs.slice(party=p) for p in parties], labels=parties)
195
+ # N-way (≥ 2 corpora)
196
+ pcd.keyness_multi([a, b, c, d], labels=["A", "B", "C", "D"])
176
197
 
177
198
  # The discourse as a graph
178
- pcd.cooccurrence_network(immigration, top_n=30).plot()
179
-
180
- # Every Result: .to_df() · .plot() · .explain() · .summary() · .to_html() · .to_json()
199
+ pcd.cooccurrence_network(corpus, top_n=30).plot()
181
200
  ```
182
201
 
183
- The snippet above runs as-is on a fresh `pip install pycorpdiff` — no data
184
- download required. Replace `load_hansard_sample()` with `pcd.from_dataframe(your_df, ...)`,
185
- `pcd.read_parquet(...)`, `pcd.fetch_hansard(...)`, or `pcd.from_huggingface(...)`
186
- to use your own corpus.
187
-
188
202
  See [`examples/pycorpdiff_showcase.ipynb`](examples/pycorpdiff_showcase.ipynb)
189
203
  ([rendered HTML](docs/rendered/pycorpdiff_showcase.html)) for a
190
204
  walkthrough on a synthetic UK Hansard corpus exercising every analytical
@@ -35,7 +35,7 @@ points — one-line adapters, no plugin registry. The base install pulls
35
35
  only `numpy`, `pandas`, `scipy`, and `pyarrow`; everything else is opt-in
36
36
  via extras.
37
37
 
38
- > **Status: alpha (0.1.0a1).** Public API is stable for the features
38
+ > **Status: alpha (0.1.0a3).** Public API is stable for the features
39
39
  > described below; on PyPI as `pip install pycorpdiff`.
40
40
 
41
41
  ## The three-layer architecture
@@ -48,48 +48,62 @@ via extras.
48
48
 
49
49
  ## Quick start
50
50
 
51
+ ```bash
52
+ pip install "pycorpdiff[viz]"
53
+ ```
54
+
51
55
  ```python
52
56
  import pycorpdiff as pcd
53
57
 
54
- # Bundled synthetic UK-Hansard corpus — runs offline, no data needed.
58
+ # Bundled UK-Hansard sample — runs offline, no data download.
55
59
  corpus = pcd.load_hansard_sample()
56
60
  immigration = corpus.slice(topic="immigration")
57
- human = immigration.slice(frame="humanising")
58
- criminal = immigration.slice(frame="criminalising")
59
-
60
- # Compare — three verbs
61
- k = pcd.compare(human, criminal).keyness()
62
- c = pcd.compare(human, criminal).collocation_shift("immigrant")
63
- # s = pcd.compare(human, criminal).semantic_shift("immigrant", embedder=pcd.SBERTEmbedder())
64
- # requires `pip install "pycorpdiff[semantic]"`
65
-
66
- # Track over time
67
- tr = pcd.track(immigration, "criminal").over_time(freq="Y")
68
- tr.changepoints() # offline PELT
69
- tr.changepoints_online(hazard=1/24) # Bayesian online (Adams & MacKay 2007)
70
- tr.interrupted_time_series(event_date="2016") # segmented OLS
71
- tr.causal_impact(event_date="2016") # Bayesian counterfactual (Brodersen 2015)
72
- tr.forecast(horizon=4) # state-space ETS
61
+
62
+ # Which words separate the humanising and criminalising frames?
63
+ keyness = pcd.compare(
64
+ immigration.slice(frame="humanising"),
65
+ immigration.slice(frame="criminalising"),
66
+ ).keyness(min_count=3)
67
+
68
+ keyness.plot() # volcano plot picture the result
69
+ # keyness.table.head(10) # or look at the ranked table directly
70
+ # keyness.explain("criminal") # KWIC concordances showing the textual evidence
71
+ ```
72
+
73
+ That's the entire surface in five lines: load a corpus, slice it,
74
+ compare two slices, plot the result. Every other analytical method —
75
+ collocation shifts, semantic drift, temporal trajectories, changepoint
76
+ detection, causal-impact analysis, forecasting, co-occurrence networks,
77
+ N-way keyness — follows the same shape. See
78
+ [the showcase notebook](docs/rendered/pycorpdiff_showcase.html) for the
79
+ full feature tour, or the cheat sheet below for one-line API previews.
80
+
81
+ ### Cheat sheet — every analytical surface in one block
82
+
83
+ ```python
84
+ # Compare verbs (returns Result objects with .plot / .to_df / .explain / .summary)
85
+ pcd.compare(a, b).keyness()
86
+ pcd.compare(a, b).collocation_shift("migrant")
87
+ pcd.compare(a, b).semantic_shift("migrant", embedder=pcd.SBERTEmbedder()) # [semantic]
88
+
89
+ # Track over time (requires [temporal] for the changepoint + ITS + forecast + causal_impact methods)
90
+ tr = pcd.track(corpus, "migrant").over_time(freq="Y")
91
+ tr.changepoints() # offline PELT
92
+ tr.changepoints_online(hazard=1/24) # Bayesian online (Adams & MacKay 2007)
93
+ tr.interrupted_time_series(event_date="2016") # segmented OLS
94
+ tr.causal_impact(event_date="2016") # Bayesian counterfactual (Brodersen 2015)
95
+ tr.forecast(horizon=4) # state-space ETS
73
96
 
74
97
  # Before / after a known event
75
98
  pcd.compare.before_after(corpus, event_date="2016-06-23").keyness()
76
99
 
77
- # N-way (≥ 2 corpora) — one keyness across all four parties
78
- parties = ["Conservative", "Labour", "Liberal Democrat", "SNP"]
79
- nhs = corpus.slice(topic="nhs")
80
- pcd.keyness_multi([nhs.slice(party=p) for p in parties], labels=parties)
100
+ # N-way (≥ 2 corpora)
101
+ pcd.keyness_multi([a, b, c, d], labels=["A", "B", "C", "D"])
81
102
 
82
103
  # The discourse as a graph
83
- pcd.cooccurrence_network(immigration, top_n=30).plot()
84
-
85
- # Every Result: .to_df() · .plot() · .explain() · .summary() · .to_html() · .to_json()
104
+ pcd.cooccurrence_network(corpus, top_n=30).plot()
86
105
  ```
87
106
 
88
- The snippet above runs as-is on a fresh `pip install pycorpdiff` — no data
89
- download required. Replace `load_hansard_sample()` with `pcd.from_dataframe(your_df, ...)`,
90
- `pcd.read_parquet(...)`, `pcd.fetch_hansard(...)`, or `pcd.from_huggingface(...)`
91
- to use your own corpus.
92
-
93
107
  See [`examples/pycorpdiff_showcase.ipynb`](examples/pycorpdiff_showcase.ipynb)
94
108
  ([rendered HTML](docs/rendered/pycorpdiff_showcase.html)) for a
95
109
  walkthrough on a synthetic UK Hansard corpus exercising every analytical
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "pycorpdiff"
7
- version = "0.1.0a1"
7
+ version = "0.1.0a3"
8
8
  description = "Comparative corpus analysis for Python: keyness, collocations, semantic shift, temporal trajectories with changepoints + causal inference."
9
9
  readme = "README.md"
10
10
  license = { file = "LICENSE" }
@@ -66,7 +66,7 @@ huggingface = ["datasets>=2.14"]
66
66
  # rendered HTML examples. `jupyter` runs the notebook, `vl-convert` does
67
67
  # static SVG/PNG export of altair charts, `pysofra` renders the showcase's
68
68
  # result tables in JAMA-style typography.
69
- notebooks = ["jupyter>=1.0", "vl-convert-python>=1.5", "pysofra>=0.1.0a2"]
69
+ notebooks = ["jupyter>=1.0", "vl-convert-python>=1.5", "pysofra>=0.1.0a3"]
70
70
  # Meta-extra so `pycorpdiff[all]` exercises every optional code path.
71
71
  all = [
72
72
  "altair>=5",
@@ -82,7 +82,7 @@ all = [
82
82
  "duckdb>=0.10",
83
83
  "spacy>=3.7",
84
84
  "vl-convert-python>=1.5",
85
- "pysofra>=0.1.0a2",
85
+ "pysofra>=0.1.0a3",
86
86
  ]
87
87
  dev = [
88
88
  "pytest>=8",
@@ -14,12 +14,12 @@ Example
14
14
 
15
15
  >>> import pycorpdiff as pcd
16
16
  >>> pcd.__version__
17
- '0.1.0a1'
17
+ '0.1.0a3'
18
18
  """
19
19
 
20
20
  from __future__ import annotations
21
21
 
22
- __version__ = "0.1.0a1"
22
+ __version__ = "0.1.0a3"
23
23
 
24
24
  from .collocation.network import NetworkResult, cooccurrence_network
25
25
  from .compare import Comparison, compare
File without changes
File without changes