pycorpdiff 0.1.0a0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pycorpdiff/__init__.py +126 -0
- pycorpdiff/_backends/__init__.py +3 -0
- pycorpdiff/_backends/pandas.py +3 -0
- pycorpdiff/_backends/polars.py +3 -0
- pycorpdiff/collocation/__init__.py +19 -0
- pycorpdiff/collocation/cooccurrence.py +65 -0
- pycorpdiff/collocation/measures.py +102 -0
- pycorpdiff/collocation/network.py +233 -0
- pycorpdiff/collocation/shift.py +146 -0
- pycorpdiff/compare.py +345 -0
- pycorpdiff/corpus.py +411 -0
- pycorpdiff/datasets/__init__.py +27 -0
- pycorpdiff/datasets/_data/hansard_sample.parquet +0 -0
- pycorpdiff/datasets/_generate_hansard.py +221 -0
- pycorpdiff/datasets/hansard.py +235 -0
- pycorpdiff/datasets/histwords.py +221 -0
- pycorpdiff/explain.py +177 -0
- pycorpdiff/io/__init__.py +16 -0
- pycorpdiff/io/duckdb.py +92 -0
- pycorpdiff/io/huggingface.py +142 -0
- pycorpdiff/io/readers.py +138 -0
- pycorpdiff/keyness/__init__.py +26 -0
- pycorpdiff/keyness/bayes.py +50 -0
- pycorpdiff/keyness/chi_squared.py +94 -0
- pycorpdiff/keyness/correction.py +34 -0
- pycorpdiff/keyness/dispersion.py +89 -0
- pycorpdiff/keyness/effect_sizes.py +65 -0
- pycorpdiff/keyness/loglikelihood.py +92 -0
- pycorpdiff/keyness/multicorpus.py +143 -0
- pycorpdiff/keyness/permutation.py +154 -0
- pycorpdiff/py.typed +0 -0
- pycorpdiff/results.py +635 -0
- pycorpdiff/semantic/__init__.py +18 -0
- pycorpdiff/semantic/alignment.py +53 -0
- pycorpdiff/semantic/embed.py +84 -0
- pycorpdiff/semantic/shift.py +224 -0
- pycorpdiff/semantic/trajectory.py +166 -0
- pycorpdiff/stats.py +69 -0
- pycorpdiff/temporal/__init__.py +15 -0
- pycorpdiff/temporal/bocpd.py +233 -0
- pycorpdiff/temporal/causal_impact.py +293 -0
- pycorpdiff/temporal/changepoint.py +92 -0
- pycorpdiff/temporal/forecast.py +405 -0
- pycorpdiff/temporal/its.py +123 -0
- pycorpdiff/temporal/slicing.py +174 -0
- pycorpdiff/tokenize.py +110 -0
- pycorpdiff/viz/__init__.py +37 -0
- pycorpdiff/viz/bocpd.py +173 -0
- pycorpdiff/viz/causal_impact.py +142 -0
- pycorpdiff/viz/collocation.py +48 -0
- pycorpdiff/viz/dispersion.py +117 -0
- pycorpdiff/viz/forecast.py +129 -0
- pycorpdiff/viz/keyness.py +96 -0
- pycorpdiff/viz/network.py +186 -0
- pycorpdiff/viz/scattertext.py +160 -0
- pycorpdiff/viz/semantic_forecast.py +114 -0
- pycorpdiff/viz/trajectory.py +48 -0
- pycorpdiff-0.1.0a0.dist-info/METADATA +230 -0
- pycorpdiff-0.1.0a0.dist-info/RECORD +61 -0
- pycorpdiff-0.1.0a0.dist-info/WHEEL +4 -0
- pycorpdiff-0.1.0a0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
"""Temporal trajectory plot — line + Wilson CI band."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import TYPE_CHECKING
|
|
6
|
+
|
|
7
|
+
import pandas as pd
|
|
8
|
+
|
|
9
|
+
if TYPE_CHECKING:
|
|
10
|
+
import altair as alt
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def trajectory_with_ci(
|
|
14
|
+
df: pd.DataFrame,
|
|
15
|
+
width: int = 600,
|
|
16
|
+
height: int = 300,
|
|
17
|
+
) -> alt.Chart:
|
|
18
|
+
"""Time series of relative frequencies with a Wilson CI band.
|
|
19
|
+
|
|
20
|
+
Expects the columns produced by :meth:`Tracker.over_time`:
|
|
21
|
+
``period``, ``term``, ``relfreq``, ``ci_lower``, ``ci_upper``.
|
|
22
|
+
Multiple terms are layered with the standard altair colour scheme.
|
|
23
|
+
|
|
24
|
+
The ``period`` column may contain :class:`pandas.Period` values —
|
|
25
|
+
converted to timestamps internally so altair gets a temporal axis.
|
|
26
|
+
"""
|
|
27
|
+
import altair as alt
|
|
28
|
+
|
|
29
|
+
plot_df = df.copy()
|
|
30
|
+
if isinstance(plot_df["period"].iloc[0], pd.Period):
|
|
31
|
+
plot_df["period"] = plot_df["period"].apply(lambda p: p.to_timestamp())
|
|
32
|
+
|
|
33
|
+
base = alt.Chart(plot_df).encode(
|
|
34
|
+
x=alt.X("period:T", title=None),
|
|
35
|
+
color=alt.Color("term:N", title=None),
|
|
36
|
+
)
|
|
37
|
+
band = base.mark_area(opacity=0.2).encode(
|
|
38
|
+
y=alt.Y("ci_lower:Q", title="Relative frequency"),
|
|
39
|
+
y2="ci_upper:Q",
|
|
40
|
+
)
|
|
41
|
+
line = base.mark_line(strokeWidth=2).encode(
|
|
42
|
+
y="relfreq:Q",
|
|
43
|
+
tooltip=["period", "term", "count", "total", "relfreq", "ci_lower", "ci_upper"],
|
|
44
|
+
)
|
|
45
|
+
points = base.mark_point(filled=True, size=50).encode(
|
|
46
|
+
y="relfreq:Q",
|
|
47
|
+
)
|
|
48
|
+
return (band + line + points).properties(width=width, height=height) # type: ignore[no-any-return]
|
|
@@ -0,0 +1,230 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: pycorpdiff
|
|
3
|
+
Version: 0.1.0a0
|
|
4
|
+
Summary: Comparative corpus analysis for Python: keyness, collocations, semantic shift, temporal trajectories with changepoints + causal inference.
|
|
5
|
+
Project-URL: Homepage, https://github.com/jturner-uofl/pycorpdiff
|
|
6
|
+
Project-URL: Documentation, https://github.com/jturner-uofl/pycorpdiff
|
|
7
|
+
Project-URL: Repository, https://github.com/jturner-uofl/pycorpdiff
|
|
8
|
+
Project-URL: Issues, https://github.com/jturner-uofl/pycorpdiff/issues
|
|
9
|
+
Author-email: Jason Turner <jason.s.turner@gmail.com>
|
|
10
|
+
License: MIT License
|
|
11
|
+
|
|
12
|
+
Copyright (c) 2026 Jason Turner
|
|
13
|
+
|
|
14
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
15
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
16
|
+
in the Software without restriction, including without limitation the rights
|
|
17
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
18
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
19
|
+
furnished to do so, subject to the following conditions:
|
|
20
|
+
|
|
21
|
+
The above copyright notice and this permission notice shall be included in all
|
|
22
|
+
copies or substantial portions of the Software.
|
|
23
|
+
|
|
24
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
25
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
26
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
27
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
28
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
29
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
30
|
+
SOFTWARE.
|
|
31
|
+
License-File: LICENSE
|
|
32
|
+
Keywords: collocation,comparative corpus analysis,computational social science,corpus linguistics,diachronic nlp,digital humanities,discourse analysis,keyness,semantic change,temporal text analysis
|
|
33
|
+
Classifier: Development Status :: 2 - Pre-Alpha
|
|
34
|
+
Classifier: Intended Audience :: Science/Research
|
|
35
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
36
|
+
Classifier: Programming Language :: Python :: 3
|
|
37
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
38
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
39
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
40
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
41
|
+
Classifier: Topic :: Scientific/Engineering :: Information Analysis
|
|
42
|
+
Classifier: Topic :: Text Processing :: Linguistic
|
|
43
|
+
Requires-Python: >=3.11
|
|
44
|
+
Requires-Dist: numpy>=1.24
|
|
45
|
+
Requires-Dist: pandas<3,>=2.0
|
|
46
|
+
Requires-Dist: pyarrow>=14
|
|
47
|
+
Requires-Dist: scipy>=1.11
|
|
48
|
+
Provides-Extra: all
|
|
49
|
+
Requires-Dist: altair>=5; extra == 'all'
|
|
50
|
+
Requires-Dist: datasets>=2.14; extra == 'all'
|
|
51
|
+
Requires-Dist: duckdb>=0.10; extra == 'all'
|
|
52
|
+
Requires-Dist: matplotlib>=3.8; extra == 'all'
|
|
53
|
+
Requires-Dist: networkx>=3.1; extra == 'all'
|
|
54
|
+
Requires-Dist: polars>=1.0; extra == 'all'
|
|
55
|
+
Requires-Dist: pyarrow>=15; extra == 'all'
|
|
56
|
+
Requires-Dist: pysofra>=0.1.0a2; extra == 'all'
|
|
57
|
+
Requires-Dist: ruptures>=1.1; extra == 'all'
|
|
58
|
+
Requires-Dist: scikit-learn>=1.3; extra == 'all'
|
|
59
|
+
Requires-Dist: sentence-transformers>=2.2; extra == 'all'
|
|
60
|
+
Requires-Dist: spacy>=3.7; extra == 'all'
|
|
61
|
+
Requires-Dist: statsmodels>=0.14; extra == 'all'
|
|
62
|
+
Requires-Dist: vl-convert-python>=1.5; extra == 'all'
|
|
63
|
+
Provides-Extra: dev
|
|
64
|
+
Requires-Dist: hypothesis>=6.100; extra == 'dev'
|
|
65
|
+
Requires-Dist: mypy>=1.8; extra == 'dev'
|
|
66
|
+
Requires-Dist: pandas-stubs>=2.2; extra == 'dev'
|
|
67
|
+
Requires-Dist: pre-commit>=3.6; extra == 'dev'
|
|
68
|
+
Requires-Dist: pytest-cov>=4.1; extra == 'dev'
|
|
69
|
+
Requires-Dist: pytest>=8; extra == 'dev'
|
|
70
|
+
Requires-Dist: ruff>=0.4; extra == 'dev'
|
|
71
|
+
Provides-Extra: duckdb
|
|
72
|
+
Requires-Dist: duckdb>=0.10; extra == 'duckdb'
|
|
73
|
+
Provides-Extra: huggingface
|
|
74
|
+
Requires-Dist: datasets>=2.14; extra == 'huggingface'
|
|
75
|
+
Provides-Extra: nlp
|
|
76
|
+
Requires-Dist: spacy>=3.7; extra == 'nlp'
|
|
77
|
+
Provides-Extra: notebooks
|
|
78
|
+
Requires-Dist: jupyter>=1.0; extra == 'notebooks'
|
|
79
|
+
Requires-Dist: pysofra>=0.1.0a2; extra == 'notebooks'
|
|
80
|
+
Requires-Dist: vl-convert-python>=1.5; extra == 'notebooks'
|
|
81
|
+
Provides-Extra: polars
|
|
82
|
+
Requires-Dist: polars>=1.0; extra == 'polars'
|
|
83
|
+
Requires-Dist: pyarrow>=15; extra == 'polars'
|
|
84
|
+
Provides-Extra: semantic
|
|
85
|
+
Requires-Dist: scikit-learn>=1.3; extra == 'semantic'
|
|
86
|
+
Requires-Dist: sentence-transformers>=2.2; extra == 'semantic'
|
|
87
|
+
Provides-Extra: temporal
|
|
88
|
+
Requires-Dist: ruptures>=1.1; extra == 'temporal'
|
|
89
|
+
Requires-Dist: statsmodels>=0.14; extra == 'temporal'
|
|
90
|
+
Provides-Extra: viz
|
|
91
|
+
Requires-Dist: altair>=5; extra == 'viz'
|
|
92
|
+
Requires-Dist: matplotlib>=3.8; extra == 'viz'
|
|
93
|
+
Requires-Dist: networkx>=3.1; extra == 'viz'
|
|
94
|
+
Description-Content-Type: text/markdown
|
|
95
|
+
|
|
96
|
+
# pycorpdiff
|
|
97
|
+
|
|
98
|
+
<!--
|
|
99
|
+
TODO post-publish (Phase 5 — once GitHub repo public + PyPI published + Zenodo DOI minted):
|
|
100
|
+
|
|
101
|
+
[](https://pypi.org/project/pycorpdiff/)
|
|
102
|
+
[](https://pypi.org/project/pycorpdiff/)
|
|
103
|
+
[](https://github.com/jturner-uofl/pycorpdiff/actions/workflows/ci.yml)
|
|
104
|
+
[](https://doi.org/10.5281/zenodo.<RECORD>)
|
|
105
|
+
[](https://opensource.org/licenses/MIT)
|
|
106
|
+
-->
|
|
107
|
+
|
|
108
|
+
**Comparative corpus analysis for modern Python workflows.**
|
|
109
|
+
|
|
110
|
+
`pycorpdiff` is the **missing comparative layer** between R's
|
|
111
|
+
[`quanteda`](https://quanteda.io/), the closed-source SketchEngine
|
|
112
|
+
platform, and the fragmented Python NLP stack
|
|
113
|
+
(`nltk`/`spaCy`/`gensim`/`sentence-transformers`). Three public verbs
|
|
114
|
+
— `compare(a, b)`, `track(c, term)`, `compare.before_after(c, event)` —
|
|
115
|
+
consolidate keyness, collocations, dispersion, temporal trajectories,
|
|
116
|
+
changepoint detection, interrupted time series, causal-impact analysis,
|
|
117
|
+
forecasting, online changepoint detection, and embedding-based semantic
|
|
118
|
+
shift under a single notebook-native API. Every result carries its own
|
|
119
|
+
KWIC evidence: `.explain(term)` returns the source-text concordances
|
|
120
|
+
behind any ranked term.
|
|
121
|
+
|
|
122
|
+
The package answers the questions corpus linguistics, digital humanities,
|
|
123
|
+
and computational social science routinely have:
|
|
124
|
+
|
|
125
|
+
- *How does corpus A differ from corpus B?* — `compare(a, b).keyness()`
|
|
126
|
+
- *How has discourse around X evolved over time?* — `track(c, "x").over_time()`
|
|
127
|
+
- *What did "migrant" mean in 2005 vs 2023?* — `compare(...).semantic_shift("migrant", embedder=...)`
|
|
128
|
+
- *Did this event actually shift the conversation?* — `track(...).causal_impact(event_date=...)`
|
|
129
|
+
- *Where is the discourse heading?* — `track(...).forecast(horizon=4)`
|
|
130
|
+
|
|
131
|
+
`pycorpdiff` is positioned as **orchestration**, not reinvention.
|
|
132
|
+
Tokenizers (`spaCy`, `Stanza`, `jieba`, `fugashi`) and embedders (any
|
|
133
|
+
`SBERT`-compatible model) plug in via two `typing.Protocol` extension
|
|
134
|
+
points — one-line adapters, no plugin registry. The base install pulls
|
|
135
|
+
only `numpy`, `pandas`, `scipy`, and `pyarrow`; everything else is opt-in
|
|
136
|
+
via extras.
|
|
137
|
+
|
|
138
|
+
> **Status: pre-release alpha (0.1.0a0).** Public API is stable for the
|
|
139
|
+
> features described below; PyPI publication is the next milestone.
|
|
140
|
+
|
|
141
|
+
## The three-layer architecture
|
|
142
|
+
|
|
143
|
+
| Layer | Purpose | Key surface |
|
|
144
|
+
|---|---|---|
|
|
145
|
+
| **1 — Ingestion + `Corpus`** | get text in, slice it, hash it | `from_dataframe`, `read_csv`, `read_parquet`, `read_txt`, `read_duckdb`, `from_huggingface`, `fetch_hansard`, `Corpus.slice/by_time/__hash__/doc_term_counts(_sparse)/to_polars` |
|
|
146
|
+
| **2 — Pure math** | statistics with no I/O | `keyness.{log_likelihood,chi_squared,log_ratio,percent_diff,bayes_factor,permutation_pvalues,keyness_multi,juilland_d,benjamini_hochberg}`; `collocation.{logdice,pmi,t_score,mi_three,collocation_shift,cooccurrence_network}`; `semantic.{HashEmbedder,SBERTEmbedder,semantic_trajectory,neighborhood_drift}`; `temporal.{changepoints,interrupted_time_series,forecast,causal_impact,bocpd}` |
|
|
147
|
+
| **3 — Verbs + Results** | public API | `compare`, `track`, `compare.before_after`, `keyness_multi`, plus 9 frozen-dataclass Result types each with `.to_df() / .plot() / .explain() / .summary() / .to_html() / .to_json()` |
|
|
148
|
+
|
|
149
|
+
## Quick start
|
|
150
|
+
|
|
151
|
+
```python
|
|
152
|
+
import pycorpdiff as pcd
|
|
153
|
+
|
|
154
|
+
news = pcd.from_dataframe(df, text_col="body", meta_cols=("outlet", "date"))
|
|
155
|
+
|
|
156
|
+
# Compare — three verbs
|
|
157
|
+
k = pcd.compare(news.slice(outlet="Guardian"), news.slice(outlet="Mail")).keyness()
|
|
158
|
+
c = pcd.compare(a, b).collocation_shift("migrant")
|
|
159
|
+
s = pcd.compare(a, b).semantic_shift("migrant", embedder=pcd.SBERTEmbedder())
|
|
160
|
+
|
|
161
|
+
# Track over time
|
|
162
|
+
tr = pcd.track(news, "migrant").over_time(freq="Y")
|
|
163
|
+
tr.changepoints() # offline PELT
|
|
164
|
+
tr.changepoints_online(hazard=1/24) # Bayesian online (Adams & MacKay 2007)
|
|
165
|
+
tr.interrupted_time_series(event_date="2016-06-23") # segmented OLS
|
|
166
|
+
tr.causal_impact(event_date="2016-06-23") # Bayesian counterfactual (Brodersen 2015)
|
|
167
|
+
tr.forecast(horizon=4) # state-space ETS
|
|
168
|
+
|
|
169
|
+
# Before / after a known event
|
|
170
|
+
pcd.compare.before_after(news, event_date="2016-06-23").keyness()
|
|
171
|
+
|
|
172
|
+
# N-way (≥ 2 corpora)
|
|
173
|
+
pcd.keyness_multi([gu, ma, te, mi], labels=["Guardian", "Mail", "Telegraph", "Mirror"])
|
|
174
|
+
|
|
175
|
+
# The discourse as a graph
|
|
176
|
+
pcd.cooccurrence_network(news, top_n=50).plot()
|
|
177
|
+
|
|
178
|
+
# Every Result: .to_df() · .plot() · .explain() · .summary() · .to_html() · .to_json()
|
|
179
|
+
```
|
|
180
|
+
|
|
181
|
+
See [`examples/pycorpdiff_showcase.ipynb`](examples/pycorpdiff_showcase.ipynb)
|
|
182
|
+
([rendered HTML](docs/rendered/pycorpdiff_showcase.html)) for a
|
|
183
|
+
walkthrough on a synthetic UK Hansard corpus exercising every analytical
|
|
184
|
+
surface.
|
|
185
|
+
|
|
186
|
+
## Installation
|
|
187
|
+
|
|
188
|
+
<!-- TODO post-publish: replace this block with the PyPI install commands once published. -->
|
|
189
|
+
|
|
190
|
+
Currently a pre-release alpha. From a local clone:
|
|
191
|
+
|
|
192
|
+
```bash
|
|
193
|
+
git clone https://github.com/jturner-uofl/pycorpdiff
|
|
194
|
+
cd pycorpdiff
|
|
195
|
+
pip install -e ".[dev]"
|
|
196
|
+
pytest -q # 519 default tests, ~7s
|
|
197
|
+
```
|
|
198
|
+
|
|
199
|
+
Optional extras: `[viz]` (altair + matplotlib + networkx), `[semantic]`
|
|
200
|
+
(sentence-transformers + scikit-learn), `[temporal]` (ruptures +
|
|
201
|
+
statsmodels), `[polars]`, `[duckdb]`, `[huggingface]`, `[nlp]` (spaCy),
|
|
202
|
+
`[notebooks]` (jupyter + vl-convert + pysofra, for the showcase),
|
|
203
|
+
or `[all]`.
|
|
204
|
+
|
|
205
|
+
## Cross-validation receipts
|
|
206
|
+
|
|
207
|
+
The math agrees with the standard tools — by automated test:
|
|
208
|
+
|
|
209
|
+
- **Rayson's LL Wizard** — 15 hand-derived contingency-table reference triples
|
|
210
|
+
- **NLTK** `BigramAssocMeasures` — PMI + t-score to ≤ 1e-12 on every adjacent bigram
|
|
211
|
+
- **Scattertext (Kessler 2017)** — behavioural agreement on the 2012 US Conventions corpus
|
|
212
|
+
- **quanteda (R)** via `rpy2` — byte-for-byte G² agreement (slow tier)
|
|
213
|
+
- **HistWords (Hamilton et al. 2016)** — diachronic cosine displacements on COHA (slow tier)
|
|
214
|
+
|
|
215
|
+
## Citation
|
|
216
|
+
|
|
217
|
+
If you use `pycorpdiff` in academic work, please cite the software via
|
|
218
|
+
the `CITATION.cff` file in this repository — GitHub renders a "Cite this
|
|
219
|
+
repository" widget directly from it.
|
|
220
|
+
|
|
221
|
+
## License
|
|
222
|
+
|
|
223
|
+
MIT — see [LICENSE](LICENSE).
|
|
224
|
+
|
|
225
|
+
## Further reading
|
|
226
|
+
|
|
227
|
+
- [`docs/design.md`](docs/design.md) — three-layer architecture
|
|
228
|
+
- [`docs/statistical-methods.md`](docs/statistical-methods.md) — every metric's formula + citation
|
|
229
|
+
- [`examples/pycorpdiff_showcase.ipynb`](examples/pycorpdiff_showcase.ipynb) — full feature tour as a notebook
|
|
230
|
+
- [`docs/rendered/`](docs/rendered/) — self-contained HTML renders of the example notebooks
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
pycorpdiff/__init__.py,sha256=p_NMp7wO5xL3LSTli6ULbSEUHfc1i_cqLDhnjgutHZw,3582
|
|
2
|
+
pycorpdiff/compare.py,sha256=YcDXucRF9xXHFYijGOXe7erlFZR6U4Rc-NlowZfA0AE,13487
|
|
3
|
+
pycorpdiff/corpus.py,sha256=fX8-C6_A0VEHq3HaSOBfEXvKAcaRK5SBGaEL1_gbKWA,16090
|
|
4
|
+
pycorpdiff/explain.py,sha256=UVDwf3GpGr2uW1XWaB7fLjd9igbDgCTmFpLRyG3ZMZ0,5808
|
|
5
|
+
pycorpdiff/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
6
|
+
pycorpdiff/results.py,sha256=0w5ktYuD7bA309-mxfDjM7Sfs8O3nqK22jaN6nx-iRs,22879
|
|
7
|
+
pycorpdiff/stats.py,sha256=rvxNC95lF8ZyEufAH1zq8_Kv6ZA61bIeU2ek-GoqGD4,2501
|
|
8
|
+
pycorpdiff/tokenize.py,sha256=bt1fyUyLdN9J1208FMfWDz-YwOMTVFVTSmP5GEWYJQE,3852
|
|
9
|
+
pycorpdiff/_backends/__init__.py,sha256=wrtXgtwznp_kfa_gy9h4QqrdHQlUzxmu220Maf4x05U,118
|
|
10
|
+
pycorpdiff/_backends/pandas.py,sha256=HVGv8u7tFl2wN51BiVoTNUp4KC5qA7xmPFpl2jQVCHM,117
|
|
11
|
+
pycorpdiff/_backends/polars.py,sha256=xQD3NP-BmeIRpxn-7rBC-eqnHjgJvGrsJePENOzw9sA,111
|
|
12
|
+
pycorpdiff/collocation/__init__.py,sha256=RneM6uLUCDQGCHk8P49MEeEP3AmivJuDb8MjvC0LBzE,457
|
|
13
|
+
pycorpdiff/collocation/cooccurrence.py,sha256=wIUkfjDNYruw5il7MYn6SQyzPM1WHN-xACXzSHF7cd0,2146
|
|
14
|
+
pycorpdiff/collocation/measures.py,sha256=2Ee7xcJbWZmYPUCOByCW_m_eleyWaWO_3BlX8DxsgfQ,3115
|
|
15
|
+
pycorpdiff/collocation/network.py,sha256=mSAEg4XJTL-ryMfxeSg4DDzNNxhC1fOvQEFmHwZAJ7c,7999
|
|
16
|
+
pycorpdiff/collocation/shift.py,sha256=3QdGX8TncpYB0EsvMXvcAovacv6ctc9uF-ASPrq-Q8k,4834
|
|
17
|
+
pycorpdiff/datasets/__init__.py,sha256=F2e4SoZNTM7eKcZxVs8b-7lg7T6YNm7O0uaRa-ufb1c,979
|
|
18
|
+
pycorpdiff/datasets/_generate_hansard.py,sha256=Eqs4pZHIaxz52TK62500KxsyPUAvFLkFOkIeMPfnSd4,10717
|
|
19
|
+
pycorpdiff/datasets/hansard.py,sha256=Xf49UKfMhWmw19-8bMCLy-NGzZ0p4qfKWzZVXS7Gk4Y,8523
|
|
20
|
+
pycorpdiff/datasets/histwords.py,sha256=IdMCuIFLq63gqCBIR1fFSvnUFSeXh6iNreK6zuFOYsA,8939
|
|
21
|
+
pycorpdiff/datasets/_data/hansard_sample.parquet,sha256=F19tKAmIEPdCT9noBJdlC5Nc-6YbNBC9aksEWx1Jcvo,14061
|
|
22
|
+
pycorpdiff/io/__init__.py,sha256=9DNFyjnZhZW9J5T0MUhKJ0aeWcRWpZMGF3GTGjI6FiE,384
|
|
23
|
+
pycorpdiff/io/duckdb.py,sha256=4vHtLawKn748JTEM1JK0VsUYVF2GrxVYeukruqqRyHc,3204
|
|
24
|
+
pycorpdiff/io/huggingface.py,sha256=UcgpENNvFeffiJjWw1oc9hl4WpUFDaZ7mFmaPmJQHzg,4716
|
|
25
|
+
pycorpdiff/io/readers.py,sha256=QmGbNCS9uytm0L_EwV0LfAOrid07UtmnOig2zj7c2Ho,4600
|
|
26
|
+
pycorpdiff/keyness/__init__.py,sha256=RMTcKE7kEn9CZzuOwmhKMu8uEvGlTb_iZF-p8o9Lz-Q,706
|
|
27
|
+
pycorpdiff/keyness/bayes.py,sha256=dSjcrLjtQkXcP32RvQPMtDDBBGB7nbHI0VpqXwJxELQ,1591
|
|
28
|
+
pycorpdiff/keyness/chi_squared.py,sha256=RCuul3_YK_QMkXe07dCUILikHHNtK6GzPHxfXv3SjKk,3355
|
|
29
|
+
pycorpdiff/keyness/correction.py,sha256=QAgyGhBcayU0rOOXZ2Pvgo5JC_dtYALWmFt4Qv8ZL5E,1183
|
|
30
|
+
pycorpdiff/keyness/dispersion.py,sha256=XymmPq-Ee8TM4Iek5bXES58dAJmID9U4yz-FxcYkF-Q,3881
|
|
31
|
+
pycorpdiff/keyness/effect_sizes.py,sha256=jkBzAhtbph6xgq2r_MtM7JD2Bevx4ZKhk_koRmy7wZU,2279
|
|
32
|
+
pycorpdiff/keyness/loglikelihood.py,sha256=8Kr5aJM9ButjLVwBoljHev-6wIQYuK_yo-IPlrqKd4w,3042
|
|
33
|
+
pycorpdiff/keyness/multicorpus.py,sha256=oBWzkHDL4RcKUABXu3YEUAqmPOftE0gSxvLDlzdGxDg,5120
|
|
34
|
+
pycorpdiff/keyness/permutation.py,sha256=yysrBa3vbrigEtWgvGg7EK4QDZLZsrdDAmxYe-K0aFw,5773
|
|
35
|
+
pycorpdiff/semantic/__init__.py,sha256=HUEn_o2q1iNtP8Ebn9Wc-HAiNALLqTODKsFuyet_5g4,465
|
|
36
|
+
pycorpdiff/semantic/alignment.py,sha256=k4wBmUWTXTc3laSzJPD997m37dVoQwA5aJIeTHsAaVI,2069
|
|
37
|
+
pycorpdiff/semantic/embed.py,sha256=d5mr_90TvKU3_di6PEJsnSQHeCyRWvS-RnsAvEsa0tE,3413
|
|
38
|
+
pycorpdiff/semantic/shift.py,sha256=fQArBeUxHy6lYDnsMif7w3L5SqCEC9n4hBFEFh1mu1M,7879
|
|
39
|
+
pycorpdiff/semantic/trajectory.py,sha256=AWVvTuFZGdWnSWag4Em_c7Nl026T-G1AKTtEfntfHyM,6101
|
|
40
|
+
pycorpdiff/temporal/__init__.py,sha256=U474af23-oq2tliJZoVVKzyaQevshxU9rjiPrKGHyT0,370
|
|
41
|
+
pycorpdiff/temporal/bocpd.py,sha256=pah0amEi7yADSotCGYoy2Bzo_Dyc_SA3SHndIQ_pQig,8374
|
|
42
|
+
pycorpdiff/temporal/causal_impact.py,sha256=29WNH4f33cXA-XdnuUH3gzPBsNLSkveyRECQ4Coa6HU,11105
|
|
43
|
+
pycorpdiff/temporal/changepoint.py,sha256=ZoyryX5sOlfbnEPawV1Q6XPLvMMSJ4EqFc30quLQ5LY,3159
|
|
44
|
+
pycorpdiff/temporal/forecast.py,sha256=NQih5VDs7p4lwL_Yy1kaFHZUE43r7pEd5Ibbyt1k4zo,13801
|
|
45
|
+
pycorpdiff/temporal/its.py,sha256=t_YAyPe_XotR96e4W7ODKIDCZ80vDyXUf2n0o44DwFA,4396
|
|
46
|
+
pycorpdiff/temporal/slicing.py,sha256=njsKN1DuSdbBKE5DYypUa2F_UMxPVgRpKtU-IJeaDlE,6372
|
|
47
|
+
pycorpdiff/viz/__init__.py,sha256=qy4Cxad3gkiULnkbohN0ecIfMEWiZ3ysnkedTiBxbqw,1197
|
|
48
|
+
pycorpdiff/viz/bocpd.py,sha256=vlIIPOMmapNORUTGTkMLmetziJzZIalI_-E4ZPi5GKs,5804
|
|
49
|
+
pycorpdiff/viz/causal_impact.py,sha256=H3AVIO4Mv6sioflH8iSPOi5VUN4DDrThr9vnYlroj8Y,4909
|
|
50
|
+
pycorpdiff/viz/collocation.py,sha256=g7Lep6LrqrHkUNAHSjXd1WTUWzdQtaQdQwkdD3NpZRc,1365
|
|
51
|
+
pycorpdiff/viz/dispersion.py,sha256=8pPhJNuu_cTFkTK0Z9knGCvAqZ-KKPlHDGWONBjB32E,3846
|
|
52
|
+
pycorpdiff/viz/forecast.py,sha256=Za7G9pOTAAnWiRPld9lsNyTNhUp10CgqM34LlE8iM9E,4090
|
|
53
|
+
pycorpdiff/viz/keyness.py,sha256=wks5zWNcmm-2SFdG1ev6UVS-a8ySkmoAG4LCc48T_oM,3043
|
|
54
|
+
pycorpdiff/viz/network.py,sha256=W_GFsvFAGzA2FFQQXJOsXq7K7QQItSqH-2krWdRKwpw,6117
|
|
55
|
+
pycorpdiff/viz/scattertext.py,sha256=K64AGKO_-XBtbEML0JCCGUn25GUUBeyX9uQOW8qQDio,5375
|
|
56
|
+
pycorpdiff/viz/semantic_forecast.py,sha256=C4IHjNt4BC9pHNWzPDnQVQihI-HyfSjgsqDhx4U8dR4,3706
|
|
57
|
+
pycorpdiff/viz/trajectory.py,sha256=Dlrr-pJGDib7s67HMrhbtgls0dK1vxqfWizUzyNCFXQ,1538
|
|
58
|
+
pycorpdiff-0.1.0a0.dist-info/METADATA,sha256=He0-iGnk-nRDRsR7rd60GRo79nU1bqFlLd0_IXFcvAA,11392
|
|
59
|
+
pycorpdiff-0.1.0a0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
|
|
60
|
+
pycorpdiff-0.1.0a0.dist-info/licenses/LICENSE,sha256=ejByysE4yqPsBN9CLSCSuK85QFzY979kJ7fIR8R_J7U,1069
|
|
61
|
+
pycorpdiff-0.1.0a0.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Jason Turner
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|