pycorpdiff 0.1.0a0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. pycorpdiff/__init__.py +126 -0
  2. pycorpdiff/_backends/__init__.py +3 -0
  3. pycorpdiff/_backends/pandas.py +3 -0
  4. pycorpdiff/_backends/polars.py +3 -0
  5. pycorpdiff/collocation/__init__.py +19 -0
  6. pycorpdiff/collocation/cooccurrence.py +65 -0
  7. pycorpdiff/collocation/measures.py +102 -0
  8. pycorpdiff/collocation/network.py +233 -0
  9. pycorpdiff/collocation/shift.py +146 -0
  10. pycorpdiff/compare.py +345 -0
  11. pycorpdiff/corpus.py +411 -0
  12. pycorpdiff/datasets/__init__.py +27 -0
  13. pycorpdiff/datasets/_data/hansard_sample.parquet +0 -0
  14. pycorpdiff/datasets/_generate_hansard.py +221 -0
  15. pycorpdiff/datasets/hansard.py +235 -0
  16. pycorpdiff/datasets/histwords.py +221 -0
  17. pycorpdiff/explain.py +177 -0
  18. pycorpdiff/io/__init__.py +16 -0
  19. pycorpdiff/io/duckdb.py +92 -0
  20. pycorpdiff/io/huggingface.py +142 -0
  21. pycorpdiff/io/readers.py +138 -0
  22. pycorpdiff/keyness/__init__.py +26 -0
  23. pycorpdiff/keyness/bayes.py +50 -0
  24. pycorpdiff/keyness/chi_squared.py +94 -0
  25. pycorpdiff/keyness/correction.py +34 -0
  26. pycorpdiff/keyness/dispersion.py +89 -0
  27. pycorpdiff/keyness/effect_sizes.py +65 -0
  28. pycorpdiff/keyness/loglikelihood.py +92 -0
  29. pycorpdiff/keyness/multicorpus.py +143 -0
  30. pycorpdiff/keyness/permutation.py +154 -0
  31. pycorpdiff/py.typed +0 -0
  32. pycorpdiff/results.py +635 -0
  33. pycorpdiff/semantic/__init__.py +18 -0
  34. pycorpdiff/semantic/alignment.py +53 -0
  35. pycorpdiff/semantic/embed.py +84 -0
  36. pycorpdiff/semantic/shift.py +224 -0
  37. pycorpdiff/semantic/trajectory.py +166 -0
  38. pycorpdiff/stats.py +69 -0
  39. pycorpdiff/temporal/__init__.py +15 -0
  40. pycorpdiff/temporal/bocpd.py +233 -0
  41. pycorpdiff/temporal/causal_impact.py +293 -0
  42. pycorpdiff/temporal/changepoint.py +92 -0
  43. pycorpdiff/temporal/forecast.py +405 -0
  44. pycorpdiff/temporal/its.py +123 -0
  45. pycorpdiff/temporal/slicing.py +174 -0
  46. pycorpdiff/tokenize.py +110 -0
  47. pycorpdiff/viz/__init__.py +37 -0
  48. pycorpdiff/viz/bocpd.py +173 -0
  49. pycorpdiff/viz/causal_impact.py +142 -0
  50. pycorpdiff/viz/collocation.py +48 -0
  51. pycorpdiff/viz/dispersion.py +117 -0
  52. pycorpdiff/viz/forecast.py +129 -0
  53. pycorpdiff/viz/keyness.py +96 -0
  54. pycorpdiff/viz/network.py +186 -0
  55. pycorpdiff/viz/scattertext.py +160 -0
  56. pycorpdiff/viz/semantic_forecast.py +114 -0
  57. pycorpdiff/viz/trajectory.py +48 -0
  58. pycorpdiff-0.1.0a0.dist-info/METADATA +230 -0
  59. pycorpdiff-0.1.0a0.dist-info/RECORD +61 -0
  60. pycorpdiff-0.1.0a0.dist-info/WHEEL +4 -0
  61. pycorpdiff-0.1.0a0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,48 @@
1
+ """Temporal trajectory plot — line + Wilson CI band."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import TYPE_CHECKING
6
+
7
+ import pandas as pd
8
+
9
+ if TYPE_CHECKING:
10
+ import altair as alt
11
+
12
+
13
+ def trajectory_with_ci(
14
+ df: pd.DataFrame,
15
+ width: int = 600,
16
+ height: int = 300,
17
+ ) -> alt.Chart:
18
+ """Time series of relative frequencies with a Wilson CI band.
19
+
20
+ Expects the columns produced by :meth:`Tracker.over_time`:
21
+ ``period``, ``term``, ``relfreq``, ``ci_lower``, ``ci_upper``.
22
+ Multiple terms are layered with the standard altair colour scheme.
23
+
24
+ The ``period`` column may contain :class:`pandas.Period` values —
25
+ converted to timestamps internally so altair gets a temporal axis.
26
+ """
27
+ import altair as alt
28
+
29
+ plot_df = df.copy()
30
+ if isinstance(plot_df["period"].iloc[0], pd.Period):
31
+ plot_df["period"] = plot_df["period"].apply(lambda p: p.to_timestamp())
32
+
33
+ base = alt.Chart(plot_df).encode(
34
+ x=alt.X("period:T", title=None),
35
+ color=alt.Color("term:N", title=None),
36
+ )
37
+ band = base.mark_area(opacity=0.2).encode(
38
+ y=alt.Y("ci_lower:Q", title="Relative frequency"),
39
+ y2="ci_upper:Q",
40
+ )
41
+ line = base.mark_line(strokeWidth=2).encode(
42
+ y="relfreq:Q",
43
+ tooltip=["period", "term", "count", "total", "relfreq", "ci_lower", "ci_upper"],
44
+ )
45
+ points = base.mark_point(filled=True, size=50).encode(
46
+ y="relfreq:Q",
47
+ )
48
+ return (band + line + points).properties(width=width, height=height) # type: ignore[no-any-return]
@@ -0,0 +1,230 @@
1
+ Metadata-Version: 2.4
2
+ Name: pycorpdiff
3
+ Version: 0.1.0a0
4
+ Summary: Comparative corpus analysis for Python: keyness, collocations, semantic shift, temporal trajectories with changepoints + causal inference.
5
+ Project-URL: Homepage, https://github.com/jturner-uofl/pycorpdiff
6
+ Project-URL: Documentation, https://github.com/jturner-uofl/pycorpdiff
7
+ Project-URL: Repository, https://github.com/jturner-uofl/pycorpdiff
8
+ Project-URL: Issues, https://github.com/jturner-uofl/pycorpdiff/issues
9
+ Author-email: Jason Turner <jason.s.turner@gmail.com>
10
+ License: MIT License
11
+
12
+ Copyright (c) 2026 Jason Turner
13
+
14
+ Permission is hereby granted, free of charge, to any person obtaining a copy
15
+ of this software and associated documentation files (the "Software"), to deal
16
+ in the Software without restriction, including without limitation the rights
17
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
18
+ copies of the Software, and to permit persons to whom the Software is
19
+ furnished to do so, subject to the following conditions:
20
+
21
+ The above copyright notice and this permission notice shall be included in all
22
+ copies or substantial portions of the Software.
23
+
24
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
25
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
26
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
27
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
28
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
29
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30
+ SOFTWARE.
31
+ License-File: LICENSE
32
+ Keywords: collocation,comparative corpus analysis,computational social science,corpus linguistics,diachronic nlp,digital humanities,discourse analysis,keyness,semantic change,temporal text analysis
33
+ Classifier: Development Status :: 2 - Pre-Alpha
34
+ Classifier: Intended Audience :: Science/Research
35
+ Classifier: License :: OSI Approved :: MIT License
36
+ Classifier: Programming Language :: Python :: 3
37
+ Classifier: Programming Language :: Python :: 3 :: Only
38
+ Classifier: Programming Language :: Python :: 3.11
39
+ Classifier: Programming Language :: Python :: 3.12
40
+ Classifier: Programming Language :: Python :: 3.13
41
+ Classifier: Topic :: Scientific/Engineering :: Information Analysis
42
+ Classifier: Topic :: Text Processing :: Linguistic
43
+ Requires-Python: >=3.11
44
+ Requires-Dist: numpy>=1.24
45
+ Requires-Dist: pandas<3,>=2.0
46
+ Requires-Dist: pyarrow>=14
47
+ Requires-Dist: scipy>=1.11
48
+ Provides-Extra: all
49
+ Requires-Dist: altair>=5; extra == 'all'
50
+ Requires-Dist: datasets>=2.14; extra == 'all'
51
+ Requires-Dist: duckdb>=0.10; extra == 'all'
52
+ Requires-Dist: matplotlib>=3.8; extra == 'all'
53
+ Requires-Dist: networkx>=3.1; extra == 'all'
54
+ Requires-Dist: polars>=1.0; extra == 'all'
55
+ Requires-Dist: pyarrow>=15; extra == 'all'
56
+ Requires-Dist: pysofra>=0.1.0a2; extra == 'all'
57
+ Requires-Dist: ruptures>=1.1; extra == 'all'
58
+ Requires-Dist: scikit-learn>=1.3; extra == 'all'
59
+ Requires-Dist: sentence-transformers>=2.2; extra == 'all'
60
+ Requires-Dist: spacy>=3.7; extra == 'all'
61
+ Requires-Dist: statsmodels>=0.14; extra == 'all'
62
+ Requires-Dist: vl-convert-python>=1.5; extra == 'all'
63
+ Provides-Extra: dev
64
+ Requires-Dist: hypothesis>=6.100; extra == 'dev'
65
+ Requires-Dist: mypy>=1.8; extra == 'dev'
66
+ Requires-Dist: pandas-stubs>=2.2; extra == 'dev'
67
+ Requires-Dist: pre-commit>=3.6; extra == 'dev'
68
+ Requires-Dist: pytest-cov>=4.1; extra == 'dev'
69
+ Requires-Dist: pytest>=8; extra == 'dev'
70
+ Requires-Dist: ruff>=0.4; extra == 'dev'
71
+ Provides-Extra: duckdb
72
+ Requires-Dist: duckdb>=0.10; extra == 'duckdb'
73
+ Provides-Extra: huggingface
74
+ Requires-Dist: datasets>=2.14; extra == 'huggingface'
75
+ Provides-Extra: nlp
76
+ Requires-Dist: spacy>=3.7; extra == 'nlp'
77
+ Provides-Extra: notebooks
78
+ Requires-Dist: jupyter>=1.0; extra == 'notebooks'
79
+ Requires-Dist: pysofra>=0.1.0a2; extra == 'notebooks'
80
+ Requires-Dist: vl-convert-python>=1.5; extra == 'notebooks'
81
+ Provides-Extra: polars
82
+ Requires-Dist: polars>=1.0; extra == 'polars'
83
+ Requires-Dist: pyarrow>=15; extra == 'polars'
84
+ Provides-Extra: semantic
85
+ Requires-Dist: scikit-learn>=1.3; extra == 'semantic'
86
+ Requires-Dist: sentence-transformers>=2.2; extra == 'semantic'
87
+ Provides-Extra: temporal
88
+ Requires-Dist: ruptures>=1.1; extra == 'temporal'
89
+ Requires-Dist: statsmodels>=0.14; extra == 'temporal'
90
+ Provides-Extra: viz
91
+ Requires-Dist: altair>=5; extra == 'viz'
92
+ Requires-Dist: matplotlib>=3.8; extra == 'viz'
93
+ Requires-Dist: networkx>=3.1; extra == 'viz'
94
+ Description-Content-Type: text/markdown
95
+
96
+ # pycorpdiff
97
+
98
+ <!--
99
+ TODO post-publish (Phase 5 — once GitHub repo public + PyPI published + Zenodo DOI minted):
100
+
101
+ [![PyPI](https://img.shields.io/pypi/v/pycorpdiff.svg)](https://pypi.org/project/pycorpdiff/)
102
+ [![Python versions](https://img.shields.io/pypi/pyversions/pycorpdiff.svg)](https://pypi.org/project/pycorpdiff/)
103
+ [![CI](https://github.com/jturner-uofl/pycorpdiff/actions/workflows/ci.yml/badge.svg)](https://github.com/jturner-uofl/pycorpdiff/actions/workflows/ci.yml)
104
+ [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.<RECORD>.svg)](https://doi.org/10.5281/zenodo.<RECORD>)
105
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
106
+ -->
107
+
108
+ **Comparative corpus analysis for modern Python workflows.**
109
+
110
+ `pycorpdiff` is the **missing comparative layer** between R's
111
+ [`quanteda`](https://quanteda.io/), the closed-source SketchEngine
112
+ platform, and the fragmented Python NLP stack
113
+ (`nltk`/`spaCy`/`gensim`/`sentence-transformers`). Three public verbs
114
+ — `compare(a, b)`, `track(c, term)`, `compare.before_after(c, event)` —
115
+ consolidate keyness, collocations, dispersion, temporal trajectories,
116
+ changepoint detection, interrupted time series, causal-impact analysis,
117
+ forecasting, online changepoint detection, and embedding-based semantic
118
+ shift under a single notebook-native API. Every result carries its own
119
+ KWIC evidence: `.explain(term)` returns the source-text concordances
120
+ behind any ranked term.
121
+
122
+ The package answers the questions corpus linguistics, digital humanities,
123
+ and computational social science routinely have:
124
+
125
+ - *How does corpus A differ from corpus B?* — `compare(a, b).keyness()`
126
+ - *How has discourse around X evolved over time?* — `track(c, "x").over_time()`
127
+ - *What did "migrant" mean in 2005 vs 2023?* — `compare(...).semantic_shift("migrant", embedder=...)`
128
+ - *Did this event actually shift the conversation?* — `track(...).causal_impact(event_date=...)`
129
+ - *Where is the discourse heading?* — `track(...).forecast(horizon=4)`
130
+
131
+ `pycorpdiff` is positioned as **orchestration**, not reinvention.
132
+ Tokenizers (`spaCy`, `Stanza`, `jieba`, `fugashi`) and embedders (any
133
+ `SBERT`-compatible model) plug in via two `typing.Protocol` extension
134
+ points — one-line adapters, no plugin registry. The base install pulls
135
+ only `numpy`, `pandas`, `scipy`, and `pyarrow`; everything else is opt-in
136
+ via extras.
137
+
138
+ > **Status: pre-release alpha (0.1.0a0).** Public API is stable for the
139
+ > features described below; PyPI publication is the next milestone.
140
+
141
+ ## The three-layer architecture
142
+
143
+ | Layer | Purpose | Key surface |
144
+ |---|---|---|
145
+ | **1 — Ingestion + `Corpus`** | get text in, slice it, hash it | `from_dataframe`, `read_csv`, `read_parquet`, `read_txt`, `read_duckdb`, `from_huggingface`, `fetch_hansard`, `Corpus.slice/by_time/__hash__/doc_term_counts(_sparse)/to_polars` |
146
+ | **2 — Pure math** | statistics with no I/O | `keyness.{log_likelihood,chi_squared,log_ratio,percent_diff,bayes_factor,permutation_pvalues,keyness_multi,juilland_d,benjamini_hochberg}`; `collocation.{logdice,pmi,t_score,mi_three,collocation_shift,cooccurrence_network}`; `semantic.{HashEmbedder,SBERTEmbedder,semantic_trajectory,neighborhood_drift}`; `temporal.{changepoints,interrupted_time_series,forecast,causal_impact,bocpd}` |
147
+ | **3 — Verbs + Results** | public API | `compare`, `track`, `compare.before_after`, `keyness_multi`, plus 9 frozen-dataclass Result types each with `.to_df() / .plot() / .explain() / .summary() / .to_html() / .to_json()` |
148
+
149
+ ## Quick start
150
+
151
+ ```python
152
+ import pycorpdiff as pcd
153
+
154
+ news = pcd.from_dataframe(df, text_col="body", meta_cols=("outlet", "date"))
155
+
156
+ # Compare — three verbs
157
+ k = pcd.compare(news.slice(outlet="Guardian"), news.slice(outlet="Mail")).keyness()
158
+ c = pcd.compare(a, b).collocation_shift("migrant")
159
+ s = pcd.compare(a, b).semantic_shift("migrant", embedder=pcd.SBERTEmbedder())
160
+
161
+ # Track over time
162
+ tr = pcd.track(news, "migrant").over_time(freq="Y")
163
+ tr.changepoints() # offline PELT
164
+ tr.changepoints_online(hazard=1/24) # Bayesian online (Adams & MacKay 2007)
165
+ tr.interrupted_time_series(event_date="2016-06-23") # segmented OLS
166
+ tr.causal_impact(event_date="2016-06-23") # Bayesian counterfactual (Brodersen 2015)
167
+ tr.forecast(horizon=4) # state-space ETS
168
+
169
+ # Before / after a known event
170
+ pcd.compare.before_after(news, event_date="2016-06-23").keyness()
171
+
172
+ # N-way (≥ 2 corpora)
173
+ pcd.keyness_multi([gu, ma, te, mi], labels=["Guardian", "Mail", "Telegraph", "Mirror"])
174
+
175
+ # The discourse as a graph
176
+ pcd.cooccurrence_network(news, top_n=50).plot()
177
+
178
+ # Every Result: .to_df() · .plot() · .explain() · .summary() · .to_html() · .to_json()
179
+ ```
180
+
181
+ See [`examples/pycorpdiff_showcase.ipynb`](examples/pycorpdiff_showcase.ipynb)
182
+ ([rendered HTML](docs/rendered/pycorpdiff_showcase.html)) for a
183
+ walkthrough on a synthetic UK Hansard corpus exercising every analytical
184
+ surface.
185
+
186
+ ## Installation
187
+
188
+ <!-- TODO post-publish: replace this block with the PyPI install commands once published. -->
189
+
190
+ Currently a pre-release alpha. From a local clone:
191
+
192
+ ```bash
193
+ git clone https://github.com/jturner-uofl/pycorpdiff
194
+ cd pycorpdiff
195
+ pip install -e ".[dev]"
196
+ pytest -q # 519 default tests, ~7s
197
+ ```
198
+
199
+ Optional extras: `[viz]` (altair + matplotlib + networkx), `[semantic]`
200
+ (sentence-transformers + scikit-learn), `[temporal]` (ruptures +
201
+ statsmodels), `[polars]`, `[duckdb]`, `[huggingface]`, `[nlp]` (spaCy),
202
+ `[notebooks]` (jupyter + vl-convert + pysofra, for the showcase),
203
+ or `[all]`.
204
+
205
+ ## Cross-validation receipts
206
+
207
+ The math agrees with the standard tools — by automated test:
208
+
209
+ - **Rayson's LL Wizard** — 15 hand-derived contingency-table reference triples
210
+ - **NLTK** `BigramAssocMeasures` — PMI + t-score to ≤ 1e-12 on every adjacent bigram
211
+ - **Scattertext (Kessler 2017)** — behavioural agreement on the 2012 US Conventions corpus
212
+ - **quanteda (R)** via `rpy2` — byte-for-byte G² agreement (slow tier)
213
+ - **HistWords (Hamilton et al. 2016)** — diachronic cosine displacements on COHA (slow tier)
214
+
215
+ ## Citation
216
+
217
+ If you use `pycorpdiff` in academic work, please cite the software via
218
+ the `CITATION.cff` file in this repository — GitHub renders a "Cite this
219
+ repository" widget directly from it.
220
+
221
+ ## License
222
+
223
+ MIT — see [LICENSE](LICENSE).
224
+
225
+ ## Further reading
226
+
227
+ - [`docs/design.md`](docs/design.md) — three-layer architecture
228
+ - [`docs/statistical-methods.md`](docs/statistical-methods.md) — every metric's formula + citation
229
+ - [`examples/pycorpdiff_showcase.ipynb`](examples/pycorpdiff_showcase.ipynb) — full feature tour as a notebook
230
+ - [`docs/rendered/`](docs/rendered/) — self-contained HTML renders of the example notebooks
@@ -0,0 +1,61 @@
1
+ pycorpdiff/__init__.py,sha256=p_NMp7wO5xL3LSTli6ULbSEUHfc1i_cqLDhnjgutHZw,3582
2
+ pycorpdiff/compare.py,sha256=YcDXucRF9xXHFYijGOXe7erlFZR6U4Rc-NlowZfA0AE,13487
3
+ pycorpdiff/corpus.py,sha256=fX8-C6_A0VEHq3HaSOBfEXvKAcaRK5SBGaEL1_gbKWA,16090
4
+ pycorpdiff/explain.py,sha256=UVDwf3GpGr2uW1XWaB7fLjd9igbDgCTmFpLRyG3ZMZ0,5808
5
+ pycorpdiff/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
+ pycorpdiff/results.py,sha256=0w5ktYuD7bA309-mxfDjM7Sfs8O3nqK22jaN6nx-iRs,22879
7
+ pycorpdiff/stats.py,sha256=rvxNC95lF8ZyEufAH1zq8_Kv6ZA61bIeU2ek-GoqGD4,2501
8
+ pycorpdiff/tokenize.py,sha256=bt1fyUyLdN9J1208FMfWDz-YwOMTVFVTSmP5GEWYJQE,3852
9
+ pycorpdiff/_backends/__init__.py,sha256=wrtXgtwznp_kfa_gy9h4QqrdHQlUzxmu220Maf4x05U,118
10
+ pycorpdiff/_backends/pandas.py,sha256=HVGv8u7tFl2wN51BiVoTNUp4KC5qA7xmPFpl2jQVCHM,117
11
+ pycorpdiff/_backends/polars.py,sha256=xQD3NP-BmeIRpxn-7rBC-eqnHjgJvGrsJePENOzw9sA,111
12
+ pycorpdiff/collocation/__init__.py,sha256=RneM6uLUCDQGCHk8P49MEeEP3AmivJuDb8MjvC0LBzE,457
13
+ pycorpdiff/collocation/cooccurrence.py,sha256=wIUkfjDNYruw5il7MYn6SQyzPM1WHN-xACXzSHF7cd0,2146
14
+ pycorpdiff/collocation/measures.py,sha256=2Ee7xcJbWZmYPUCOByCW_m_eleyWaWO_3BlX8DxsgfQ,3115
15
+ pycorpdiff/collocation/network.py,sha256=mSAEg4XJTL-ryMfxeSg4DDzNNxhC1fOvQEFmHwZAJ7c,7999
16
+ pycorpdiff/collocation/shift.py,sha256=3QdGX8TncpYB0EsvMXvcAovacv6ctc9uF-ASPrq-Q8k,4834
17
+ pycorpdiff/datasets/__init__.py,sha256=F2e4SoZNTM7eKcZxVs8b-7lg7T6YNm7O0uaRa-ufb1c,979
18
+ pycorpdiff/datasets/_generate_hansard.py,sha256=Eqs4pZHIaxz52TK62500KxsyPUAvFLkFOkIeMPfnSd4,10717
19
+ pycorpdiff/datasets/hansard.py,sha256=Xf49UKfMhWmw19-8bMCLy-NGzZ0p4qfKWzZVXS7Gk4Y,8523
20
+ pycorpdiff/datasets/histwords.py,sha256=IdMCuIFLq63gqCBIR1fFSvnUFSeXh6iNreK6zuFOYsA,8939
21
+ pycorpdiff/datasets/_data/hansard_sample.parquet,sha256=F19tKAmIEPdCT9noBJdlC5Nc-6YbNBC9aksEWx1Jcvo,14061
22
+ pycorpdiff/io/__init__.py,sha256=9DNFyjnZhZW9J5T0MUhKJ0aeWcRWpZMGF3GTGjI6FiE,384
23
+ pycorpdiff/io/duckdb.py,sha256=4vHtLawKn748JTEM1JK0VsUYVF2GrxVYeukruqqRyHc,3204
24
+ pycorpdiff/io/huggingface.py,sha256=UcgpENNvFeffiJjWw1oc9hl4WpUFDaZ7mFmaPmJQHzg,4716
25
+ pycorpdiff/io/readers.py,sha256=QmGbNCS9uytm0L_EwV0LfAOrid07UtmnOig2zj7c2Ho,4600
26
+ pycorpdiff/keyness/__init__.py,sha256=RMTcKE7kEn9CZzuOwmhKMu8uEvGlTb_iZF-p8o9Lz-Q,706
27
+ pycorpdiff/keyness/bayes.py,sha256=dSjcrLjtQkXcP32RvQPMtDDBBGB7nbHI0VpqXwJxELQ,1591
28
+ pycorpdiff/keyness/chi_squared.py,sha256=RCuul3_YK_QMkXe07dCUILikHHNtK6GzPHxfXv3SjKk,3355
29
+ pycorpdiff/keyness/correction.py,sha256=QAgyGhBcayU0rOOXZ2Pvgo5JC_dtYALWmFt4Qv8ZL5E,1183
30
+ pycorpdiff/keyness/dispersion.py,sha256=XymmPq-Ee8TM4Iek5bXES58dAJmID9U4yz-FxcYkF-Q,3881
31
+ pycorpdiff/keyness/effect_sizes.py,sha256=jkBzAhtbph6xgq2r_MtM7JD2Bevx4ZKhk_koRmy7wZU,2279
32
+ pycorpdiff/keyness/loglikelihood.py,sha256=8Kr5aJM9ButjLVwBoljHev-6wIQYuK_yo-IPlrqKd4w,3042
33
+ pycorpdiff/keyness/multicorpus.py,sha256=oBWzkHDL4RcKUABXu3YEUAqmPOftE0gSxvLDlzdGxDg,5120
34
+ pycorpdiff/keyness/permutation.py,sha256=yysrBa3vbrigEtWgvGg7EK4QDZLZsrdDAmxYe-K0aFw,5773
35
+ pycorpdiff/semantic/__init__.py,sha256=HUEn_o2q1iNtP8Ebn9Wc-HAiNALLqTODKsFuyet_5g4,465
36
+ pycorpdiff/semantic/alignment.py,sha256=k4wBmUWTXTc3laSzJPD997m37dVoQwA5aJIeTHsAaVI,2069
37
+ pycorpdiff/semantic/embed.py,sha256=d5mr_90TvKU3_di6PEJsnSQHeCyRWvS-RnsAvEsa0tE,3413
38
+ pycorpdiff/semantic/shift.py,sha256=fQArBeUxHy6lYDnsMif7w3L5SqCEC9n4hBFEFh1mu1M,7879
39
+ pycorpdiff/semantic/trajectory.py,sha256=AWVvTuFZGdWnSWag4Em_c7Nl026T-G1AKTtEfntfHyM,6101
40
+ pycorpdiff/temporal/__init__.py,sha256=U474af23-oq2tliJZoVVKzyaQevshxU9rjiPrKGHyT0,370
41
+ pycorpdiff/temporal/bocpd.py,sha256=pah0amEi7yADSotCGYoy2Bzo_Dyc_SA3SHndIQ_pQig,8374
42
+ pycorpdiff/temporal/causal_impact.py,sha256=29WNH4f33cXA-XdnuUH3gzPBsNLSkveyRECQ4Coa6HU,11105
43
+ pycorpdiff/temporal/changepoint.py,sha256=ZoyryX5sOlfbnEPawV1Q6XPLvMMSJ4EqFc30quLQ5LY,3159
44
+ pycorpdiff/temporal/forecast.py,sha256=NQih5VDs7p4lwL_Yy1kaFHZUE43r7pEd5Ibbyt1k4zo,13801
45
+ pycorpdiff/temporal/its.py,sha256=t_YAyPe_XotR96e4W7ODKIDCZ80vDyXUf2n0o44DwFA,4396
46
+ pycorpdiff/temporal/slicing.py,sha256=njsKN1DuSdbBKE5DYypUa2F_UMxPVgRpKtU-IJeaDlE,6372
47
+ pycorpdiff/viz/__init__.py,sha256=qy4Cxad3gkiULnkbohN0ecIfMEWiZ3ysnkedTiBxbqw,1197
48
+ pycorpdiff/viz/bocpd.py,sha256=vlIIPOMmapNORUTGTkMLmetziJzZIalI_-E4ZPi5GKs,5804
49
+ pycorpdiff/viz/causal_impact.py,sha256=H3AVIO4Mv6sioflH8iSPOi5VUN4DDrThr9vnYlroj8Y,4909
50
+ pycorpdiff/viz/collocation.py,sha256=g7Lep6LrqrHkUNAHSjXd1WTUWzdQtaQdQwkdD3NpZRc,1365
51
+ pycorpdiff/viz/dispersion.py,sha256=8pPhJNuu_cTFkTK0Z9knGCvAqZ-KKPlHDGWONBjB32E,3846
52
+ pycorpdiff/viz/forecast.py,sha256=Za7G9pOTAAnWiRPld9lsNyTNhUp10CgqM34LlE8iM9E,4090
53
+ pycorpdiff/viz/keyness.py,sha256=wks5zWNcmm-2SFdG1ev6UVS-a8ySkmoAG4LCc48T_oM,3043
54
+ pycorpdiff/viz/network.py,sha256=W_GFsvFAGzA2FFQQXJOsXq7K7QQItSqH-2krWdRKwpw,6117
55
+ pycorpdiff/viz/scattertext.py,sha256=K64AGKO_-XBtbEML0JCCGUn25GUUBeyX9uQOW8qQDio,5375
56
+ pycorpdiff/viz/semantic_forecast.py,sha256=C4IHjNt4BC9pHNWzPDnQVQihI-HyfSjgsqDhx4U8dR4,3706
57
+ pycorpdiff/viz/trajectory.py,sha256=Dlrr-pJGDib7s67HMrhbtgls0dK1vxqfWizUzyNCFXQ,1538
58
+ pycorpdiff-0.1.0a0.dist-info/METADATA,sha256=He0-iGnk-nRDRsR7rd60GRo79nU1bqFlLd0_IXFcvAA,11392
59
+ pycorpdiff-0.1.0a0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
60
+ pycorpdiff-0.1.0a0.dist-info/licenses/LICENSE,sha256=ejByysE4yqPsBN9CLSCSuK85QFzY979kJ7fIR8R_J7U,1069
61
+ pycorpdiff-0.1.0a0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.29.0
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Jason Turner
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.