pycorpdiff 0.1.0a0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. pycorpdiff/__init__.py +126 -0
  2. pycorpdiff/_backends/__init__.py +3 -0
  3. pycorpdiff/_backends/pandas.py +3 -0
  4. pycorpdiff/_backends/polars.py +3 -0
  5. pycorpdiff/collocation/__init__.py +19 -0
  6. pycorpdiff/collocation/cooccurrence.py +65 -0
  7. pycorpdiff/collocation/measures.py +102 -0
  8. pycorpdiff/collocation/network.py +233 -0
  9. pycorpdiff/collocation/shift.py +146 -0
  10. pycorpdiff/compare.py +345 -0
  11. pycorpdiff/corpus.py +411 -0
  12. pycorpdiff/datasets/__init__.py +27 -0
  13. pycorpdiff/datasets/_data/hansard_sample.parquet +0 -0
  14. pycorpdiff/datasets/_generate_hansard.py +221 -0
  15. pycorpdiff/datasets/hansard.py +235 -0
  16. pycorpdiff/datasets/histwords.py +221 -0
  17. pycorpdiff/explain.py +177 -0
  18. pycorpdiff/io/__init__.py +16 -0
  19. pycorpdiff/io/duckdb.py +92 -0
  20. pycorpdiff/io/huggingface.py +142 -0
  21. pycorpdiff/io/readers.py +138 -0
  22. pycorpdiff/keyness/__init__.py +26 -0
  23. pycorpdiff/keyness/bayes.py +50 -0
  24. pycorpdiff/keyness/chi_squared.py +94 -0
  25. pycorpdiff/keyness/correction.py +34 -0
  26. pycorpdiff/keyness/dispersion.py +89 -0
  27. pycorpdiff/keyness/effect_sizes.py +65 -0
  28. pycorpdiff/keyness/loglikelihood.py +92 -0
  29. pycorpdiff/keyness/multicorpus.py +143 -0
  30. pycorpdiff/keyness/permutation.py +154 -0
  31. pycorpdiff/py.typed +0 -0
  32. pycorpdiff/results.py +635 -0
  33. pycorpdiff/semantic/__init__.py +18 -0
  34. pycorpdiff/semantic/alignment.py +53 -0
  35. pycorpdiff/semantic/embed.py +84 -0
  36. pycorpdiff/semantic/shift.py +224 -0
  37. pycorpdiff/semantic/trajectory.py +166 -0
  38. pycorpdiff/stats.py +69 -0
  39. pycorpdiff/temporal/__init__.py +15 -0
  40. pycorpdiff/temporal/bocpd.py +233 -0
  41. pycorpdiff/temporal/causal_impact.py +293 -0
  42. pycorpdiff/temporal/changepoint.py +92 -0
  43. pycorpdiff/temporal/forecast.py +405 -0
  44. pycorpdiff/temporal/its.py +123 -0
  45. pycorpdiff/temporal/slicing.py +174 -0
  46. pycorpdiff/tokenize.py +110 -0
  47. pycorpdiff/viz/__init__.py +37 -0
  48. pycorpdiff/viz/bocpd.py +173 -0
  49. pycorpdiff/viz/causal_impact.py +142 -0
  50. pycorpdiff/viz/collocation.py +48 -0
  51. pycorpdiff/viz/dispersion.py +117 -0
  52. pycorpdiff/viz/forecast.py +129 -0
  53. pycorpdiff/viz/keyness.py +96 -0
  54. pycorpdiff/viz/network.py +186 -0
  55. pycorpdiff/viz/scattertext.py +160 -0
  56. pycorpdiff/viz/semantic_forecast.py +114 -0
  57. pycorpdiff/viz/trajectory.py +48 -0
  58. pycorpdiff-0.1.0a0.dist-info/METADATA +230 -0
  59. pycorpdiff-0.1.0a0.dist-info/RECORD +61 -0
  60. pycorpdiff-0.1.0a0.dist-info/WHEEL +4 -0
  61. pycorpdiff-0.1.0a0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,65 @@
1
+ """Effect-size measures for corpus comparison.
2
+
3
+ References
4
+ ----------
5
+ Hardie, A. (2014). Log Ratio: An informal introduction. Centre for Corpus
6
+ Approaches to Social Science (CASS).
7
+
8
+ Gabrielatos, C. (2018). Keyness analysis: Nature, metrics and techniques.
9
+ In *Corpus Approaches to Discourse* (pp. 225-258). Routledge.
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ import numpy as np
15
+ import pandas as pd
16
+
17
+
18
+ def log_ratio(
19
+ counts_a: pd.Series,
20
+ counts_b: pd.Series,
21
+ total_a: int,
22
+ total_b: int,
23
+ smoothing: float = 0.5,
24
+ ) -> pd.Series:
25
+ """Hardie's LogRatio: ``log2((a+α)/N_a) - log2((b+α)/N_b)``.
26
+
27
+ The Laplace ``smoothing`` constant (default 0.5, per Hardie) is added
28
+ to every count before normalisation so terms absent from one corpus
29
+ yield a finite (rather than ``±inf``) effect size. Positive LogRatio
30
+ means the term is more frequent in A; negative means more frequent in B.
31
+ """
32
+ if smoothing <= 0:
33
+ raise ValueError(f"smoothing must be > 0; got {smoothing}")
34
+
35
+ terms = counts_a.index.union(counts_b.index)
36
+ a = counts_a.reindex(terms, fill_value=0).astype(float)
37
+ b = counts_b.reindex(terms, fill_value=0).astype(float)
38
+ rate_a = (a + smoothing) / total_a
39
+ rate_b = (b + smoothing) / total_b
40
+ return pd.Series(np.log2(rate_a / rate_b), index=terms, name="log_ratio")
41
+
42
+
43
+ def percent_diff(
44
+ counts_a: pd.Series,
45
+ counts_b: pd.Series,
46
+ total_a: int,
47
+ total_b: int,
48
+ ) -> pd.Series:
49
+ """%DIFF — Gabrielatos's normalised percentage difference.
50
+
51
+ Computed as ``(rate_a - rate_b) / rate_b * 100`` where rates are per
52
+ million tokens. The denominator's choice of per-million is convention
53
+ only; it cancels out of the ratio. Returns ``+inf`` when ``b == 0``
54
+ and ``a > 0`` (the term is novel in A) — the caller may wish to
55
+ filter or replace those rows for plotting.
56
+ """
57
+ terms = counts_a.index.union(counts_b.index)
58
+ a = counts_a.reindex(terms, fill_value=0).astype(float)
59
+ b = counts_b.reindex(terms, fill_value=0).astype(float)
60
+
61
+ rate_a = a / total_a * 1_000_000
62
+ rate_b = b / total_b * 1_000_000
63
+ with np.errstate(divide="ignore", invalid="ignore"):
64
+ diff = (rate_a - rate_b) / rate_b * 100.0
65
+ return pd.Series(diff, index=terms, name="percent_diff")
@@ -0,0 +1,92 @@
1
+ """Dunning's G² log-likelihood statistic.
2
+
3
+ Reference
4
+ ---------
5
+ Dunning, T. (1993). Accurate methods for the statistics of surprise and
6
+ coincidence. *Computational Linguistics*, 19(1), 61-74.
7
+
8
+ Notes
9
+ -----
10
+ The G² returned by :func:`log_likelihood` is **signed**: positive when the
11
+ term is overused in corpus A relative to corpus B (i.e. ``a/N_a > b/N_b``)
12
+ and negative when overused in B. This is the convention CASS / Lancaster
13
+ tooling has gravitated toward — it carries direction information without
14
+ needing a separate column. The reported *p*-value uses ``|G²|`` as the
15
+ test statistic; the unsigned form is what's chi-squared distributed.
16
+ """
17
+
18
+ from __future__ import annotations
19
+
20
+ import numpy as np
21
+ import pandas as pd
22
+ from scipy.special import xlogy
23
+ from scipy.stats import chi2
24
+
25
+
26
+ def log_likelihood(
27
+ counts_a: pd.Series,
28
+ counts_b: pd.Series,
29
+ total_a: int,
30
+ total_b: int,
31
+ ) -> pd.DataFrame:
32
+ """Compute Dunning G² for every term in the union of input indices.
33
+
34
+ ``counts_a`` and ``counts_b`` are aligned on their union; missing
35
+ terms are imputed as zero. No min-count filtering is applied here —
36
+ that is the caller's responsibility (see
37
+ :meth:`pycorpdiff.Comparison.keyness`).
38
+
39
+ Parameters
40
+ ----------
41
+ counts_a, counts_b
42
+ Term-frequency series. Index entries are terms; values are
43
+ non-negative integer counts.
44
+ total_a, total_b
45
+ Corpus totals (token counts before any min-count filter). Used
46
+ for the contingency-table "not-term" cells.
47
+
48
+ Returns
49
+ -------
50
+ pandas.DataFrame
51
+ Indexed by term, with columns ``count_a``, ``count_b``,
52
+ ``expected_a``, ``expected_b``, ``g2`` (signed), ``p_value``.
53
+ """
54
+ if total_a <= 0 or total_b <= 0:
55
+ raise ValueError(f"total_a and total_b must be positive; got {total_a}, {total_b}")
56
+
57
+ terms = counts_a.index.union(counts_b.index)
58
+ a = counts_a.reindex(terms, fill_value=0).astype(np.int64).to_numpy()
59
+ b = counts_b.reindex(terms, fill_value=0).astype(np.int64).to_numpy()
60
+
61
+ obs_sum = a + b
62
+ total = total_a + total_b
63
+ expected_a = total_a * obs_sum / total
64
+ expected_b = total_b * obs_sum / total
65
+
66
+ # 2 * sum_i O_i * ln(O_i / E_i), with xlogy giving 0*log(0)=0.
67
+ unsigned = 2.0 * (
68
+ xlogy(a, a) - xlogy(a, expected_a) + xlogy(b, b) - xlogy(b, expected_b)
69
+ )
70
+ # Mathematically G² >= 0; clip away the tiny negative values that
71
+ # surface from float roundoff when the two corpora have ~identical rates.
72
+ unsigned = np.maximum(unsigned, 0.0)
73
+
74
+ # Sign by direction of overuse: + when A's rate exceeds B's, else -.
75
+ a_rate = a / total_a
76
+ b_rate = b / total_b
77
+ sign = np.where(a_rate >= b_rate, 1.0, -1.0)
78
+ signed = sign * unsigned
79
+
80
+ p_value = chi2.sf(unsigned, df=1)
81
+
82
+ return pd.DataFrame(
83
+ {
84
+ "count_a": a,
85
+ "count_b": b,
86
+ "expected_a": expected_a,
87
+ "expected_b": expected_b,
88
+ "g2": signed,
89
+ "p_value": p_value,
90
+ },
91
+ index=terms,
92
+ )
@@ -0,0 +1,143 @@
1
+ """N-way keyness — does a term's rate differ across more than two corpora?
2
+
3
+ The two-corpus :func:`log_likelihood` answers "is this term distinctive
4
+ in A vs B". When you have three or more corpora — three news outlets,
5
+ five parties, ten decades — the natural generalisation is a one-way
6
+ contingency test: for each term, does its observed rate vary across
7
+ corpora more than chance would explain?
8
+
9
+ The reported G² uses the same marginal-only form as the two-way path
10
+ (:func:`pycorpdiff.keyness.loglikelihood.log_likelihood`), so the N=2
11
+ result agrees exactly with the existing keyness pipeline. Asymptotic
12
+ distribution is χ²(df=N−1).
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ from collections.abc import Sequence
18
+
19
+ import numpy as np
20
+ import pandas as pd
21
+ from scipy.special import xlogy
22
+ from scipy.stats import chi2
23
+
24
+ from ..corpus import Corpus, CorpusSlice
25
+
26
+
27
+ def keyness_multi(
28
+ corpora: Sequence[Corpus | CorpusSlice],
29
+ *,
30
+ labels: Sequence[str] | None = None,
31
+ min_count: int = 5,
32
+ multiple_comparisons: str = "bh",
33
+ stop_words: set[str] | list[str] | None = None,
34
+ ) -> pd.DataFrame:
35
+ """Test for each term whether its rate varies across ``corpora``.
36
+
37
+ For ``N`` corpora and each shared-vocabulary term:
38
+
39
+ .. math::
40
+
41
+ G^2 = 2 \\sum_{i=1}^{N} O_i \\ln\\!\\left(\\frac{O_i}{E_i}\\right),
42
+ \\quad E_i = \\frac{(\\sum_j O_j)\\, n_i}{\\sum_j n_j}
43
+
44
+ where :math:`O_i` is the term's count in corpus *i* and :math:`n_i`
45
+ is corpus *i*'s total tokens. Asymptotic distribution is
46
+ :math:`\\chi^2(\\text{df}=N-1)`.
47
+
48
+ Parameters
49
+ ----------
50
+ corpora
51
+ Two or more corpora (or slices) to compare side-by-side.
52
+ labels
53
+ Optional labels for the corpora; default ``["corpus_0",
54
+ "corpus_1", ...]``. Used to name the per-corpus count columns.
55
+ min_count
56
+ Drop terms whose total count across all corpora is below this.
57
+ multiple_comparisons
58
+ ``"bh"`` (default), ``"bonferroni"``, or ``"none"``.
59
+ stop_words
60
+ Iterable of terms to exclude before scoring; same semantics as
61
+ the two-way :meth:`Comparison.keyness` parameter.
62
+
63
+ Returns
64
+ -------
65
+ pandas.DataFrame
66
+ Indexed by term, sorted by G² descending. Columns: per-corpus
67
+ ``count_<label>``, ``g2``, ``p_value``, and
68
+ ``p_adjusted`` (unless ``multiple_comparisons="none"``).
69
+ """
70
+ if len(corpora) < 2:
71
+ raise ValueError(f"need at least 2 corpora; got {len(corpora)}")
72
+
73
+ if labels is None:
74
+ labels = [f"corpus_{i}" for i in range(len(corpora))]
75
+ if len(labels) != len(corpora):
76
+ raise ValueError(
77
+ f"labels must have one entry per corpus; "
78
+ f"got {len(labels)} labels for {len(corpora)} corpora"
79
+ )
80
+
81
+ vocabs = [c.vocab(min_count=1) for c in corpora]
82
+ totals = np.array([int(v.sum()) for v in vocabs], dtype=np.int64)
83
+ if (totals == 0).any():
84
+ which = [labels[i] for i, n in enumerate(totals) if n == 0]
85
+ raise ValueError(f"empty corpora: {which}")
86
+
87
+ all_terms = pd.Index([])
88
+ for v in vocabs:
89
+ all_terms = all_terms.union(v.index)
90
+
91
+ counts_matrix = np.zeros((len(all_terms), len(corpora)), dtype=np.int64)
92
+ for j, v in enumerate(vocabs):
93
+ aligned = v.reindex(all_terms, fill_value=0).astype(np.int64).to_numpy()
94
+ counts_matrix[:, j] = aligned
95
+
96
+ keep_mask = counts_matrix.sum(axis=1) >= min_count
97
+ if stop_words is not None:
98
+ stop_set = set(stop_words)
99
+ keep_mask &= ~all_terms.isin(stop_set)
100
+ kept_terms = all_terms[keep_mask]
101
+ kept_counts = counts_matrix[keep_mask]
102
+
103
+ if len(kept_terms) == 0:
104
+ empty = pd.DataFrame(
105
+ {f"count_{label}": pd.Series(dtype=np.int64) for label in labels}
106
+ )
107
+ empty["g2"] = pd.Series(dtype=float)
108
+ empty["p_value"] = pd.Series(dtype=float)
109
+ if multiple_comparisons != "none":
110
+ empty["p_adjusted"] = pd.Series(dtype=float)
111
+ return empty.rename_axis("term")
112
+
113
+ total_per_term = kept_counts.sum(axis=1).astype(np.float64)
114
+ grand_total = float(totals.sum())
115
+ expected = total_per_term[:, None] * totals[None, :] / grand_total
116
+ unsigned = 2.0 * (
117
+ xlogy(kept_counts, kept_counts) - xlogy(kept_counts, expected)
118
+ ).sum(axis=1)
119
+ unsigned = np.maximum(unsigned, 0.0)
120
+ p_value = chi2.sf(unsigned, df=len(corpora) - 1)
121
+
122
+ table = pd.DataFrame(
123
+ {f"count_{label}": kept_counts[:, j] for j, label in enumerate(labels)},
124
+ index=kept_terms,
125
+ )
126
+ table["g2"] = unsigned
127
+ table["p_value"] = p_value
128
+
129
+ if multiple_comparisons == "bh":
130
+ from .correction import benjamini_hochberg
131
+
132
+ table["p_adjusted"] = benjamini_hochberg(p_value)
133
+ elif multiple_comparisons == "bonferroni":
134
+ from .correction import bonferroni
135
+
136
+ table["p_adjusted"] = bonferroni(p_value)
137
+ elif multiple_comparisons != "none":
138
+ raise ValueError(
139
+ f"multiple_comparisons must be 'bh', 'bonferroni', or 'none'; "
140
+ f"got {multiple_comparisons!r}"
141
+ )
142
+
143
+ return table.sort_values("g2", ascending=False).rename_axis("term")
@@ -0,0 +1,154 @@
1
+ """Permutation-test *p*-values for keyness.
2
+
3
+ The asymptotic χ²-based *p*-value reported by :func:`log_likelihood`
4
+ relies on the large-sample approximation Dunning warned about in 1993:
5
+ when expected cell counts are small (~< 5), the χ²(df=1) reference
6
+ distribution stops being faithful. A permutation test sidesteps this
7
+ entirely — it builds the null distribution empirically from the data
8
+ itself by repeatedly shuffling document labels and recomputing G².
9
+
10
+ The standard reference is Westfall & Young (1993), *Resampling-Based
11
+ Multiple Testing*. The "+1/+1" small-sample correction in the *p*-value
12
+ formula is Phipson & Smyth (2010); without it the smallest reportable
13
+ *p* is 0, which is misleading for any finite *B*.
14
+ """
15
+
16
+ from __future__ import annotations
17
+
18
+ import numpy as np
19
+ import pandas as pd
20
+ from scipy import sparse
21
+ from scipy.special import xlogy
22
+
23
+ from ..corpus import Corpus, CorpusSlice
24
+
25
+
26
+ def permutation_pvalues(
27
+ a: Corpus | CorpusSlice,
28
+ b: Corpus | CorpusSlice,
29
+ *,
30
+ terms: pd.Index | list[str] | None = None,
31
+ n_permutations: int = 999,
32
+ seed: int | None = None,
33
+ ) -> pd.Series:
34
+ """Compute empirical permutation *p*-values for per-term keyness.
35
+
36
+ For each permutation, the documents from both corpora are pooled,
37
+ randomly relabelled into two groups preserving the original
38
+ document counts ``|docs(a)|`` and ``|docs(b)|``, and a per-term G²
39
+ is recomputed against the new marginal counts. The reported
40
+ *p*-value for each term is the Phipson–Smyth (2010) empirical
41
+ fraction::
42
+
43
+ p = (#perms with |G²_perm| ≥ |G²_observed| + 1) / (B + 1)
44
+
45
+ Documents (rather than tokens) are the unit of exchangeability —
46
+ this matches the null "speeches are exchangeable between frames"
47
+ that most corpus-linguistic two-sample questions are really
48
+ testing.
49
+
50
+ Parameters
51
+ ----------
52
+ a, b
53
+ The two corpora (or slices) to test.
54
+ terms
55
+ Optional restriction to a subset of vocabulary terms. If
56
+ ``None`` (default) every term in the union of vocabularies is
57
+ scored.
58
+ n_permutations
59
+ Number of label permutations. 999 is the conventional default
60
+ — coarse enough to be fast, fine enough that the smallest
61
+ attainable *p* (= 1/1000 = 0.001) is below the usual 0.01
62
+ screening threshold.
63
+ seed
64
+ Optional random seed for reproducibility.
65
+
66
+ Returns
67
+ -------
68
+ pandas.Series
69
+ Indexed by term, named ``p_permutation``. Values in [1/(B+1), 1].
70
+ """
71
+ if n_permutations < 1:
72
+ raise ValueError(f"n_permutations must be >= 1; got {n_permutations}")
73
+
74
+ dtm_a = a.doc_term_counts(min_count=1)
75
+ dtm_b = b.doc_term_counts(min_count=1)
76
+
77
+ all_terms = (
78
+ dtm_a.columns.union(dtm_b.columns) if terms is None else pd.Index(list(terms))
79
+ )
80
+
81
+ a_aligned = dtm_a.reindex(columns=all_terms, fill_value=0).astype(np.int64)
82
+ b_aligned = dtm_b.reindex(columns=all_terms, fill_value=0).astype(np.int64)
83
+
84
+ a_matrix = sparse.csr_matrix(a_aligned.to_numpy())
85
+ b_matrix = sparse.csr_matrix(b_aligned.to_numpy())
86
+ stacked = sparse.vstack([a_matrix, b_matrix]).tocsr()
87
+
88
+ n_docs_a = a_matrix.shape[0]
89
+ n_docs_total = stacked.shape[0]
90
+
91
+ if n_docs_a == 0 or n_docs_a == n_docs_total:
92
+ raise ValueError(
93
+ "permutation_pvalues needs at least one document on each side; "
94
+ f"got |docs(a)|={n_docs_a}, |docs(b)|={n_docs_total - n_docs_a}"
95
+ )
96
+
97
+ doc_lens = np.asarray(stacked.sum(axis=1)).ravel().astype(np.int64)
98
+ full_counts = np.asarray(stacked.sum(axis=0)).ravel().astype(np.int64)
99
+ total_n = int(doc_lens.sum())
100
+
101
+ if total_n == 0:
102
+ raise ValueError("permutation_pvalues needs at least one token across both corpora")
103
+
104
+ a_marginal = np.asarray(a_matrix.sum(axis=0)).ravel().astype(np.int64)
105
+ b_marginal = full_counts - a_marginal
106
+ n_a_observed = int(a_marginal.sum())
107
+ n_b_observed = total_n - n_a_observed
108
+ observed_g2 = _g2_unsigned(a_marginal, b_marginal, n_a_observed, n_b_observed)
109
+ observed_abs = np.abs(observed_g2)
110
+
111
+ rng = np.random.default_rng(seed)
112
+ extreme = np.zeros(len(all_terms), dtype=np.int64)
113
+
114
+ for _ in range(n_permutations):
115
+ perm = rng.permutation(n_docs_total)
116
+ idx_a = perm[:n_docs_a]
117
+ # Vectorised marginal sum for the shuffled "a" assignment.
118
+ marg_a_perm = np.asarray(stacked[idx_a, :].sum(axis=0)).ravel().astype(np.int64)
119
+ marg_b_perm = full_counts - marg_a_perm
120
+ n_a_perm = int(doc_lens[idx_a].sum())
121
+ n_b_perm = total_n - n_a_perm
122
+ if n_a_perm == 0 or n_b_perm == 0:
123
+ # Degenerate permutation — all-zero docs landed in one side.
124
+ # Treat as null contribution of zero (no terms exceed observed).
125
+ continue
126
+ g2_perm = _g2_unsigned(marg_a_perm, marg_b_perm, n_a_perm, n_b_perm)
127
+ extreme += np.abs(g2_perm) >= observed_abs
128
+
129
+ p_perm = (extreme + 1.0) / (n_permutations + 1.0)
130
+ return pd.Series(p_perm, index=all_terms, name="p_permutation")
131
+
132
+
133
+ def _g2_unsigned(
134
+ a: np.ndarray,
135
+ b: np.ndarray,
136
+ n_a: int,
137
+ n_b: int,
138
+ ) -> np.ndarray:
139
+ """Vectorised unsigned G² for a vocabulary of terms.
140
+
141
+ Mirrors the math in :func:`pycorpdiff.keyness.loglikelihood.log_likelihood`
142
+ but operates on already-aligned numpy arrays and skips the sign
143
+ + p-value bookkeeping. Used internally by the permutation loop
144
+ where the per-iteration cost dominates.
145
+ """
146
+ obs_sum = a + b
147
+ total = n_a + n_b
148
+ expected_a = n_a * obs_sum / total
149
+ expected_b = n_b * obs_sum / total
150
+ unsigned = 2.0 * (
151
+ xlogy(a, a) - xlogy(a, expected_a) + xlogy(b, b) - xlogy(b, expected_b)
152
+ )
153
+ clipped: np.ndarray = np.maximum(unsigned, 0.0)
154
+ return clipped
pycorpdiff/py.typed ADDED
File without changes