openstat-cli 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (143) hide show
  1. openstat/__init__.py +3 -0
  2. openstat/__main__.py +4 -0
  3. openstat/backends/__init__.py +16 -0
  4. openstat/backends/duckdb_backend.py +70 -0
  5. openstat/backends/polars_backend.py +52 -0
  6. openstat/cli.py +92 -0
  7. openstat/commands/__init__.py +82 -0
  8. openstat/commands/adv_stat_cmds.py +1255 -0
  9. openstat/commands/advanced_ml_cmds.py +576 -0
  10. openstat/commands/advreg_cmds.py +207 -0
  11. openstat/commands/alias_cmds.py +135 -0
  12. openstat/commands/arch_cmds.py +82 -0
  13. openstat/commands/arules_cmds.py +111 -0
  14. openstat/commands/automodel_cmds.py +212 -0
  15. openstat/commands/backend_cmds.py +82 -0
  16. openstat/commands/base.py +170 -0
  17. openstat/commands/bayes_cmds.py +71 -0
  18. openstat/commands/causal_cmds.py +269 -0
  19. openstat/commands/cluster_cmds.py +152 -0
  20. openstat/commands/data_cmds.py +996 -0
  21. openstat/commands/datamanip_cmds.py +672 -0
  22. openstat/commands/dataquality_cmds.py +174 -0
  23. openstat/commands/datetime_cmds.py +176 -0
  24. openstat/commands/dimreduce_cmds.py +184 -0
  25. openstat/commands/discrete_cmds.py +149 -0
  26. openstat/commands/dsl_cmds.py +143 -0
  27. openstat/commands/epi_cmds.py +93 -0
  28. openstat/commands/equiv_tobit_cmds.py +94 -0
  29. openstat/commands/esttab_cmds.py +196 -0
  30. openstat/commands/export_beamer_cmds.py +142 -0
  31. openstat/commands/export_cmds.py +201 -0
  32. openstat/commands/export_extra_cmds.py +240 -0
  33. openstat/commands/factor_cmds.py +180 -0
  34. openstat/commands/groupby_cmds.py +155 -0
  35. openstat/commands/help_cmds.py +237 -0
  36. openstat/commands/i18n_cmds.py +43 -0
  37. openstat/commands/import_extra_cmds.py +561 -0
  38. openstat/commands/influence_cmds.py +134 -0
  39. openstat/commands/iv_cmds.py +106 -0
  40. openstat/commands/manova_cmds.py +105 -0
  41. openstat/commands/mediate_cmds.py +233 -0
  42. openstat/commands/meta_cmds.py +284 -0
  43. openstat/commands/mi_cmds.py +228 -0
  44. openstat/commands/mixed_cmds.py +79 -0
  45. openstat/commands/mixture_changepoint_cmds.py +166 -0
  46. openstat/commands/ml_adv_cmds.py +147 -0
  47. openstat/commands/ml_cmds.py +178 -0
  48. openstat/commands/model_eval_cmds.py +142 -0
  49. openstat/commands/network_cmds.py +288 -0
  50. openstat/commands/nlquery_cmds.py +161 -0
  51. openstat/commands/nonparam_cmds.py +149 -0
  52. openstat/commands/outreg_cmds.py +247 -0
  53. openstat/commands/panel_cmds.py +141 -0
  54. openstat/commands/pdf_cmds.py +226 -0
  55. openstat/commands/pipeline_cmds.py +319 -0
  56. openstat/commands/plot_cmds.py +189 -0
  57. openstat/commands/plugin_cmds.py +79 -0
  58. openstat/commands/posthoc_cmds.py +153 -0
  59. openstat/commands/power_cmds.py +172 -0
  60. openstat/commands/profile_cmds.py +246 -0
  61. openstat/commands/rbridge_cmds.py +81 -0
  62. openstat/commands/regex_cmds.py +104 -0
  63. openstat/commands/report_cmds.py +48 -0
  64. openstat/commands/repro_cmds.py +129 -0
  65. openstat/commands/resampling_cmds.py +109 -0
  66. openstat/commands/reshape_cmds.py +223 -0
  67. openstat/commands/sem_cmds.py +177 -0
  68. openstat/commands/stat_cmds.py +1040 -0
  69. openstat/commands/stata_import_cmds.py +215 -0
  70. openstat/commands/string_cmds.py +124 -0
  71. openstat/commands/surv_cmds.py +145 -0
  72. openstat/commands/survey_cmds.py +153 -0
  73. openstat/commands/textanalysis_cmds.py +192 -0
  74. openstat/commands/ts_adv_cmds.py +136 -0
  75. openstat/commands/ts_cmds.py +195 -0
  76. openstat/commands/tui_cmds.py +111 -0
  77. openstat/commands/ux_cmds.py +191 -0
  78. openstat/commands/validate_cmds.py +270 -0
  79. openstat/commands/viz_adv_cmds.py +312 -0
  80. openstat/commands/viz_extra_cmds.py +251 -0
  81. openstat/commands/watch_cmds.py +69 -0
  82. openstat/config.py +106 -0
  83. openstat/dsl/__init__.py +0 -0
  84. openstat/dsl/parser.py +332 -0
  85. openstat/dsl/tokenizer.py +105 -0
  86. openstat/i18n.py +120 -0
  87. openstat/io/__init__.py +0 -0
  88. openstat/io/loader.py +187 -0
  89. openstat/jupyter/__init__.py +18 -0
  90. openstat/jupyter/display.py +18 -0
  91. openstat/jupyter/magic.py +60 -0
  92. openstat/logging_config.py +59 -0
  93. openstat/plots/__init__.py +0 -0
  94. openstat/plots/plotter.py +437 -0
  95. openstat/plots/surv_plots.py +32 -0
  96. openstat/plots/ts_plots.py +59 -0
  97. openstat/plugins/__init__.py +5 -0
  98. openstat/plugins/manager.py +69 -0
  99. openstat/repl.py +457 -0
  100. openstat/reporting/__init__.py +0 -0
  101. openstat/reporting/eda.py +208 -0
  102. openstat/reporting/report.py +67 -0
  103. openstat/script_runner.py +319 -0
  104. openstat/session.py +133 -0
  105. openstat/stats/__init__.py +0 -0
  106. openstat/stats/advanced_regression.py +269 -0
  107. openstat/stats/arch_garch.py +84 -0
  108. openstat/stats/bayesian.py +103 -0
  109. openstat/stats/causal.py +258 -0
  110. openstat/stats/clustering.py +206 -0
  111. openstat/stats/discrete.py +311 -0
  112. openstat/stats/epidemiology.py +119 -0
  113. openstat/stats/equiv_tobit.py +163 -0
  114. openstat/stats/factor.py +174 -0
  115. openstat/stats/imputation.py +282 -0
  116. openstat/stats/influence.py +78 -0
  117. openstat/stats/iv.py +131 -0
  118. openstat/stats/manova.py +124 -0
  119. openstat/stats/mixed.py +128 -0
  120. openstat/stats/ml.py +275 -0
  121. openstat/stats/ml_advanced.py +117 -0
  122. openstat/stats/model_eval.py +183 -0
  123. openstat/stats/models.py +1342 -0
  124. openstat/stats/nonparametric.py +130 -0
  125. openstat/stats/panel.py +179 -0
  126. openstat/stats/power.py +295 -0
  127. openstat/stats/resampling.py +203 -0
  128. openstat/stats/survey.py +213 -0
  129. openstat/stats/survival.py +196 -0
  130. openstat/stats/timeseries.py +142 -0
  131. openstat/stats/ts_advanced.py +114 -0
  132. openstat/types.py +11 -0
  133. openstat/web/__init__.py +1 -0
  134. openstat/web/app.py +117 -0
  135. openstat/web/session_manager.py +73 -0
  136. openstat/web/static/app.js +117 -0
  137. openstat/web/static/index.html +38 -0
  138. openstat/web/static/style.css +103 -0
  139. openstat_cli-1.0.0.dist-info/METADATA +748 -0
  140. openstat_cli-1.0.0.dist-info/RECORD +143 -0
  141. openstat_cli-1.0.0.dist-info/WHEEL +4 -0
  142. openstat_cli-1.0.0.dist-info/entry_points.txt +2 -0
  143. openstat_cli-1.0.0.dist-info/licenses/LICENSE +21 -0
openstat/config.py ADDED
@@ -0,0 +1,106 @@
1
+ """Configuration management for OpenStat.
2
+
3
+ Loads settings from ~/.openstat/config.toml (if exists) with sensible defaults.
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ from dataclasses import dataclass
9
+ from pathlib import Path
10
+
11
+ _CONFIG_DIR = Path.home() / ".openstat"
12
+ _CONFIG_FILE = _CONFIG_DIR / "config.toml"
13
+
14
+
15
+ @dataclass
16
+ class Config:
17
+ """OpenStat configuration with defaults."""
18
+
19
+ # Data
20
+ output_dir: str = "outputs"
21
+ csv_separator: str = ","
22
+ infer_schema_length: int = 10_000
23
+
24
+ # Display
25
+ tabulate_limit: int = 50
26
+ head_default: int = 10
27
+
28
+ # Undo
29
+ max_undo_stack: int = 20
30
+ max_undo_memory_mb: int = 500 # adaptive: skip snapshots if exceeds
31
+
32
+ # Plotting
33
+ plot_dpi: int = 150
34
+ plot_figsize_w: float = 8.0
35
+ plot_figsize_h: float = 5.0
36
+ plot_style: str = "default"
37
+
38
+ # Model
39
+ condition_threshold: int = 30
40
+ min_obs_per_predictor: int = 5
41
+ bootstrap_iterations: int = 1000
42
+
43
+ @classmethod
44
+ def load(cls) -> "Config":
45
+ """Load config from TOML file, falling back to defaults."""
46
+ cfg = cls()
47
+ if not _CONFIG_FILE.exists():
48
+ return cfg
49
+
50
+ try:
51
+ import tomllib
52
+ except ImportError:
53
+ try:
54
+ import tomli as tomllib # type: ignore[no-redef]
55
+ except ImportError:
56
+ return cfg # no TOML parser available, use defaults
57
+
58
+ try:
59
+ with open(_CONFIG_FILE, "rb") as f:
60
+ data = tomllib.load(f)
61
+ except Exception:
62
+ return cfg # malformed config, use defaults
63
+
64
+ # Flatten sections
65
+ flat: dict[str, object] = {}
66
+ for section_key, section_val in data.items():
67
+ if isinstance(section_val, dict):
68
+ for k, v in section_val.items():
69
+ flat[f"{section_key}_{k}"] = v
70
+ else:
71
+ flat[section_key] = section_val
72
+
73
+ # Apply known keys
74
+ for key in cfg.__dataclass_fields__:
75
+ if key in flat:
76
+ try:
77
+ setattr(cfg, key, flat[key])
78
+ except (TypeError, ValueError):
79
+ pass # ignore invalid values
80
+
81
+ return cfg
82
+
83
+
84
+ # Singleton — loaded once at import time
85
+ _config: Config | None = None
86
+
87
+
88
+ def get_config() -> Config:
89
+ """Return the global configuration (loads on first call)."""
90
+ global _config
91
+ if _config is None:
92
+ _config = Config.load()
93
+ return _config
94
+
95
+
96
+ def reset_config(override: Config | None = None) -> Config:
97
+ """Reset the global config singleton.
98
+
99
+ If *override* is given it becomes the new config; otherwise
100
+ a fresh default ``Config()`` is used. Returns the new config.
101
+
102
+ Intended for tests that need isolation from each other.
103
+ """
104
+ global _config
105
+ _config = override if override is not None else Config()
106
+ return _config
File without changes
openstat/dsl/parser.py ADDED
@@ -0,0 +1,332 @@
1
+ """Safe recursive-descent parser: expression string -> Polars expression.
2
+
3
+ Grammar:
4
+ expr -> or_expr
5
+ or_expr -> and_expr ('or' and_expr)*
6
+ and_expr -> not_expr ('and' not_expr)*
7
+ not_expr -> 'not' not_expr | compare
8
+ compare -> add (comp_op add)?
9
+ add -> mul (('+' | '-') mul)*
10
+ mul -> power (('*' | '/' | '%') power)*
11
+ power -> unary ('**' unary)?
12
+ unary -> '-' unary | atom
13
+ atom -> NUMBER | STRING | func_call | IDENT | '(' expr ')'
14
+ func_call -> IDENT '(' args? ')'
15
+ args -> expr (',' expr)*
16
+
17
+ Produces a polars.Expr. No Python eval is ever used.
18
+
19
+ Supported functions (whitelisted):
20
+ Math: log, sqrt, abs, round, exp
21
+ String: upper, lower, len_chars
22
+ Null: is_null, is_not_null, fill_null
23
+ Type: cast_float, cast_int, cast_str
24
+ """
25
+
26
+ from __future__ import annotations
27
+
28
+ import math
29
+
30
+ import polars as pl
31
+
32
+ from openstat.dsl.tokenizer import TT, Token, tokenize
33
+
34
+
35
+ class ParseError(Exception):
36
+ pass
37
+
38
+
39
+ # ── Whitelisted functions ────────────────────────────────────────────
40
+
41
+ def _apply_function(name: str, args: list[pl.Expr]) -> pl.Expr:
42
+ """Apply a whitelisted function to Polars expressions."""
43
+ # Math functions (1 argument)
44
+ if name == "log" and len(args) == 1:
45
+ return args[0].log(math.e)
46
+ if name == "log10" and len(args) == 1:
47
+ return args[0].log(10)
48
+ if name == "sqrt" and len(args) == 1:
49
+ return args[0].sqrt()
50
+ if name == "abs" and len(args) == 1:
51
+ return args[0].abs()
52
+ if name == "exp" and len(args) == 1:
53
+ return args[0].exp()
54
+ if name == "round" and len(args) in (1, 2):
55
+ decimals = 0
56
+ if len(args) == 2:
57
+ # Extract literal integer from the expression
58
+ try:
59
+ # Evaluate the literal expression to get the integer value
60
+ decimals = int(pl.select(args[1]).item())
61
+ except Exception:
62
+ raise ParseError("round() second argument must be a literal integer")
63
+ return args[0].round(decimals)
64
+
65
+ # String functions (1 argument, operate on the column)
66
+ if name == "upper" and len(args) == 1:
67
+ return args[0].str.to_uppercase()
68
+ if name == "lower" and len(args) == 1:
69
+ return args[0].str.to_lowercase()
70
+ if name == "len_chars" and len(args) == 1:
71
+ return args[0].str.len_chars()
72
+ if name == "strip" and len(args) == 1:
73
+ return args[0].str.strip_chars()
74
+ if name == "contains" and len(args) == 2:
75
+ return args[0].str.contains(args[1])
76
+
77
+ # Null functions
78
+ if name == "is_null" and len(args) == 1:
79
+ return args[0].is_null()
80
+ if name == "is_not_null" and len(args) == 1:
81
+ return args[0].is_not_null()
82
+ if name == "fill_null" and len(args) == 2:
83
+ return args[0].fill_null(args[1])
84
+
85
+ # Cast functions
86
+ if name == "cast_float" and len(args) == 1:
87
+ return args[0].cast(pl.Float64)
88
+ if name == "cast_int" and len(args) == 1:
89
+ return args[0].cast(pl.Int64)
90
+ if name == "cast_str" and len(args) == 1:
91
+ return args[0].cast(pl.Utf8)
92
+
93
+ available = (
94
+ "log, log10, sqrt, abs, exp, round, "
95
+ "upper, lower, len_chars, strip, contains, "
96
+ "is_null, is_not_null, fill_null, "
97
+ "cast_float, cast_int, cast_str"
98
+ )
99
+ raise ParseError(
100
+ f"Unknown function '{name}' with {len(args)} argument(s). "
101
+ f"Available: {available}"
102
+ )
103
+
104
+
105
+ # ── Parser ───────────────────────────────────────────────────────────
106
+
107
+ class _Parser:
108
+ def __init__(self, tokens: list[Token]) -> None:
109
+ self.tokens = tokens
110
+ self.pos = 0
111
+
112
+ # -- helpers ---------------------------------------------------------
113
+
114
+ def _peek(self) -> Token:
115
+ return self.tokens[self.pos]
116
+
117
+ def _advance(self) -> Token:
118
+ tok = self.tokens[self.pos]
119
+ self.pos += 1
120
+ return tok
121
+
122
+ def _expect(self, tt: TT, value: str | None = None) -> Token:
123
+ tok = self._advance()
124
+ if tok.type != tt or (value is not None and tok.value != value):
125
+ raise ParseError(f"Expected {tt.name} {value!r}, got {tok}")
126
+ return tok
127
+
128
+ def _match_op(self, *ops: str) -> str | None:
129
+ tok = self._peek()
130
+ if tok.type == TT.OP and tok.value in ops:
131
+ self._advance()
132
+ return tok.value
133
+ return None
134
+
135
+ # -- grammar ---------------------------------------------------------
136
+
137
+ def parse(self) -> pl.Expr:
138
+ expr = self._or_expr()
139
+ if self._peek().type != TT.EOF:
140
+ raise ParseError(f"Unexpected token: {self._peek()}")
141
+ return expr
142
+
143
+ def _or_expr(self) -> pl.Expr:
144
+ left = self._and_expr()
145
+ while self._peek().type == TT.OR:
146
+ self._advance()
147
+ right = self._and_expr()
148
+ left = left | right
149
+ return left
150
+
151
+ def _and_expr(self) -> pl.Expr:
152
+ left = self._not_expr()
153
+ while self._peek().type == TT.AND:
154
+ self._advance()
155
+ right = self._not_expr()
156
+ left = left & right
157
+ return left
158
+
159
+ def _not_expr(self) -> pl.Expr:
160
+ if self._peek().type == TT.NOT:
161
+ self._advance()
162
+ return ~self._not_expr()
163
+ return self._compare()
164
+
165
+ def _compare(self) -> pl.Expr:
166
+ left = self._add()
167
+ op = self._match_op(">", "<", ">=", "<=", "==", "!=")
168
+ if op is None:
169
+ return left
170
+ right = self._add()
171
+ ops = {
172
+ ">": left > right,
173
+ "<": left < right,
174
+ ">=": left >= right,
175
+ "<=": left <= right,
176
+ "==": left == right,
177
+ "!=": left != right,
178
+ }
179
+ return ops[op]
180
+
181
+ def _add(self) -> pl.Expr:
182
+ left = self._mul()
183
+ while True:
184
+ op = self._match_op("+", "-")
185
+ if op is None:
186
+ break
187
+ right = self._mul()
188
+ left = left + right if op == "+" else left - right
189
+ return left
190
+
191
+ def _mul(self) -> pl.Expr:
192
+ left = self._power()
193
+ while True:
194
+ op = self._match_op("*", "/", "%")
195
+ if op is None:
196
+ break
197
+ right = self._power()
198
+ if op == "*":
199
+ left = left * right
200
+ elif op == "/":
201
+ left = left / right
202
+ else:
203
+ left = left % right
204
+ return left
205
+
206
+ def _power(self) -> pl.Expr:
207
+ base = self._unary()
208
+ if self._match_op("**"):
209
+ exp = self._unary()
210
+ return base.pow(exp)
211
+ return base
212
+
213
+ def _unary(self) -> pl.Expr:
214
+ if self._match_op("-"):
215
+ return -self._unary()
216
+ return self._atom()
217
+
218
+ def _atom(self) -> pl.Expr:
219
+ tok = self._peek()
220
+
221
+ if tok.type == TT.NUMBER:
222
+ self._advance()
223
+ val = float(tok.value) if "." in tok.value else int(tok.value)
224
+ return pl.lit(val)
225
+
226
+ if tok.type == TT.STRING:
227
+ self._advance()
228
+ return pl.lit(tok.value)
229
+
230
+ if tok.type == TT.IDENT:
231
+ # Check if it's a function call: IDENT '('
232
+ next_pos = self.pos + 1
233
+ if next_pos < len(self.tokens) and self.tokens[next_pos].type == TT.LPAREN:
234
+ return self._func_call()
235
+ self._advance()
236
+ return pl.col(tok.value)
237
+
238
+ if tok.type == TT.LPAREN:
239
+ self._advance()
240
+ expr = self._or_expr()
241
+ self._expect(TT.RPAREN)
242
+ return expr
243
+
244
+ raise ParseError(f"Unexpected token: {tok}")
245
+
246
+ def _func_call(self) -> pl.Expr:
247
+ """Parse function_name(arg1, arg2, ...)."""
248
+ name_tok = self._advance() # IDENT
249
+ self._expect(TT.LPAREN)
250
+
251
+ args: list[pl.Expr] = []
252
+ if self._peek().type != TT.RPAREN:
253
+ args.append(self._or_expr())
254
+ while self._peek().type == TT.COMMA:
255
+ self._advance() # skip comma
256
+ args.append(self._or_expr())
257
+
258
+ self._expect(TT.RPAREN)
259
+ return _apply_function(name_tok.value, args)
260
+
261
+
262
+ def parse_expression(text: str) -> pl.Expr:
263
+ """Parse an expression string into a Polars Expr (safe, no eval)."""
264
+ tokens = tokenize(text)
265
+ return _Parser(tokens).parse()
266
+
267
+
268
+ def parse_formula(text: str) -> tuple[str, list[str]]:
269
+ """Parse 'y ~ x1 + x2 + x3' into (dep_var, [indep_vars]).
270
+
271
+ The '+' here means 'include predictor', not arithmetic addition.
272
+
273
+ Interaction syntax:
274
+ - x1:x2 → interaction only (product term)
275
+ - x1*x2 → full factorial = x1 + x2 + x1:x2
276
+ """
277
+ text = text.strip()
278
+ if "~" not in text:
279
+ raise ParseError("Formula must contain '~', e.g. y ~ x1 + x2")
280
+ left, right = text.split("~", 1)
281
+ dep = left.strip()
282
+ if not dep:
283
+ raise ParseError("Missing dependent variable before '~'")
284
+
285
+ # Expand x1*x2 → x1 + x2 + x1:x2 before splitting on +
286
+ right = _expand_star_interactions(right)
287
+
288
+ indeps = [v.strip() for v in right.split("+")]
289
+ indeps = [v for v in indeps if v]
290
+ if not indeps:
291
+ raise ParseError("Missing independent variables after '~'")
292
+
293
+ # Normalize interaction terms: strip whitespace around ':'
294
+ indeps = [
295
+ ":".join(p.strip() for p in v.split(":")) if ":" in v else v
296
+ for v in indeps
297
+ ]
298
+
299
+ # Deduplicate while preserving order
300
+ seen: set[str] = set()
301
+ unique: list[str] = []
302
+ for v in indeps:
303
+ if v not in seen:
304
+ seen.add(v)
305
+ unique.append(v)
306
+
307
+ return dep, unique
308
+
309
+
310
+ def _expand_star_interactions(rhs: str) -> str:
311
+ """Expand full-factorial ``*`` terms in a formula RHS string.
312
+
313
+ - ``x1*x2`` → ``x1 + x2 + x1:x2``
314
+ - ``x1*x2*x3`` → ``x1 + x2 + x3 + x1:x2 + x1:x3 + x2:x3 + x1:x2:x3``
315
+ """
316
+ from itertools import combinations
317
+
318
+ terms = [t.strip() for t in rhs.split("+")]
319
+ expanded: list[str] = []
320
+ for term in terms:
321
+ if "*" in term and ":" not in term:
322
+ parts = [p.strip() for p in term.split("*")]
323
+ # Generate all subsets of size 1..len(parts)
324
+ for r in range(1, len(parts) + 1):
325
+ for combo in combinations(parts, r):
326
+ if r == 1:
327
+ expanded.append(combo[0])
328
+ else:
329
+ expanded.append(":".join(combo))
330
+ else:
331
+ expanded.append(term)
332
+ return " + ".join(expanded)
@@ -0,0 +1,105 @@
1
+ """Safe tokenizer for OpenStat expressions.
2
+
3
+ Produces a flat list of typed tokens from a string expression.
4
+ No Python eval is ever used.
5
+
6
+ Supports:
7
+ - Backtick-quoted identifiers: `Column Name`, `income ($)`
8
+ - Function calls: log(x), sqrt(x), is_null(x)
9
+ - Standard operators and boolean keywords
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ import re
15
+ from dataclasses import dataclass
16
+ from enum import Enum, auto
17
+
18
+
19
+ class TT(Enum):
20
+ """Token types."""
21
+
22
+ NUMBER = auto()
23
+ STRING = auto()
24
+ IDENT = auto()
25
+ OP = auto()
26
+ LPAREN = auto()
27
+ RPAREN = auto()
28
+ COMMA = auto()
29
+ AND = auto()
30
+ OR = auto()
31
+ NOT = auto()
32
+ EOF = auto()
33
+
34
+
35
+ @dataclass(frozen=True)
36
+ class Token:
37
+ type: TT
38
+ value: str
39
+
40
+ def __repr__(self) -> str:
41
+ return f"Token({self.type.name}, {self.value!r})"
42
+
43
+
44
+ # Order matters: longer operators first.
45
+ _TOKEN_SPEC: list[tuple[TT | None, str]] = [
46
+ (None, r"\s+"), # skip whitespace
47
+ (TT.NUMBER, r"\d+(?:\.\d+)?"),
48
+ (TT.STRING, r'"[^"]*"|\'[^\']*\''),
49
+ (TT.IDENT, r"`[^`]+`"), # backtick-quoted identifiers
50
+ (TT.OP, r">=|<=|!=|==|>|<|\+|-|\*\*|\*|/|%"),
51
+ (TT.LPAREN, r"\("),
52
+ (TT.RPAREN, r"\)"),
53
+ (TT.COMMA, r","),
54
+ (TT.IDENT, r"[A-Za-z_][A-Za-z0-9_]*"),
55
+ ]
56
+
57
+ _KEYWORDS = {"and": TT.AND, "or": TT.OR, "not": TT.NOT}
58
+
59
+ _PATTERN = re.compile(
60
+ "|".join(f"(?P<G{i}>{pat})" for i, (_, pat) in enumerate(_TOKEN_SPEC))
61
+ )
62
+
63
+
64
+ def tokenize(text: str) -> list[Token]:
65
+ """Tokenize an expression string into a list of Tokens.
66
+
67
+ Raises ValueError if the input contains unrecognized characters.
68
+ """
69
+ tokens: list[Token] = []
70
+ last_end = 0
71
+ for m in _PATTERN.finditer(text):
72
+ # Check for unmatched characters between tokens
73
+ if m.start() > last_end:
74
+ bad = text[last_end:m.start()]
75
+ raise ValueError(
76
+ f"Unexpected character(s) at position {last_end}: {bad!r}"
77
+ )
78
+ last_end = m.end()
79
+ for i, (tt, _) in enumerate(_TOKEN_SPEC):
80
+ val = m.group(f"G{i}")
81
+ if val is not None:
82
+ if tt is None:
83
+ break # whitespace — skip
84
+ if tt == TT.IDENT:
85
+ # Strip backticks if present
86
+ if val.startswith("`") and val.endswith("`"):
87
+ tokens.append(Token(TT.IDENT, val[1:-1]))
88
+ elif val.lower() in _KEYWORDS:
89
+ tokens.append(Token(_KEYWORDS[val.lower()], val.lower()))
90
+ else:
91
+ tokens.append(Token(tt, val))
92
+ elif tt == TT.STRING:
93
+ tokens.append(Token(tt, val[1:-1])) # strip quotes
94
+ else:
95
+ tokens.append(Token(tt, val))
96
+ break
97
+ # Check for trailing unmatched characters
98
+ if last_end < len(text):
99
+ bad = text[last_end:]
100
+ if bad.strip(): # ignore trailing whitespace
101
+ raise ValueError(
102
+ f"Unexpected character(s) at position {last_end}: {bad.strip()!r}"
103
+ )
104
+ tokens.append(Token(TT.EOF, ""))
105
+ return tokens
openstat/i18n.py ADDED
@@ -0,0 +1,120 @@
1
+ """Lightweight i18n: translate UI strings for OpenStat.
2
+
3
+ Usage::
4
+
5
+ from openstat.i18n import t, set_locale
6
+ set_locale("tr")
7
+ print(t("no_data")) # → "Veri yüklenmedi."
8
+
9
+ Supported locales: en (default), tr.
10
+ Additional locales can be registered at runtime via ``register_locale()``.
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ _LOCALE: str = "en"
16
+
17
+ _STRINGS: dict[str, dict[str, str]] = {
18
+ "en": {
19
+ # Generic errors
20
+ "no_data": "No dataset loaded. Use: load <path>",
21
+ "col_not_found": "Column not found: {col}",
22
+ "unknown_subcmd": "Unknown sub-command: {subcmd}",
23
+ # Data commands
24
+ "load_ok": "Loaded {rows:,} rows × {cols} columns from {path}",
25
+ "save_ok": "Saved to: {path}",
26
+ "describe_header": "Dataset: {name} | {rows:,} rows × {cols} columns",
27
+ "summarize_header": "Summary Statistics",
28
+ # Model results
29
+ "model_fitted": "{model} fitted. {info}",
30
+ "model_none": "No model fitted yet.",
31
+ # Export
32
+ "export_docx_ok": "Word document saved: {path}",
33
+ "export_pptx_ok": "PowerPoint saved: {path}",
34
+ # Session
35
+ "session_info_header": "Session Information",
36
+ "seed_set": "Seed set to {seed}. Reproducible random operations enabled.",
37
+ "seed_none": "No seed set.",
38
+ # Dashboard
39
+ "dashboard_closed": "Dashboard closed.",
40
+ "dashboard_missing": (
41
+ "textual is required for the dashboard.\n"
42
+ "Install: pip install textual"
43
+ ),
44
+ # Misc
45
+ "undo_ok": "Undo successful. Restored previous dataset.",
46
+ "undo_fail": "Nothing to undo.",
47
+ },
48
+ "tr": {
49
+ # Generic errors
50
+ "no_data": "Veri kümesi yüklenmedi. Kullanım: load <yol>",
51
+ "col_not_found": "Sütun bulunamadı: {col}",
52
+ "unknown_subcmd": "Bilinmeyen alt komut: {subcmd}",
53
+ # Data commands
54
+ "load_ok": "{path} dosyasından {rows:,} satır × {cols} sütun yüklendi",
55
+ "save_ok": "Kaydedildi: {path}",
56
+ "describe_header": "Veri kümesi: {name} | {rows:,} satır × {cols} sütun",
57
+ "summarize_header": "Özet İstatistikler",
58
+ # Model results
59
+ "model_fitted": "{model} tahmin edildi. {info}",
60
+ "model_none": "Henüz model tahmin edilmedi.",
61
+ # Export
62
+ "export_docx_ok": "Word belgesi kaydedildi: {path}",
63
+ "export_pptx_ok": "PowerPoint kaydedildi: {path}",
64
+ # Session
65
+ "session_info_header": "Oturum Bilgisi",
66
+ "seed_set": "Başlangıç değeri {seed} olarak ayarlandı. Tekrarlanabilir rastgele işlemler etkin.",
67
+ "seed_none": "Başlangıç değeri ayarlanmadı.",
68
+ # Dashboard
69
+ "dashboard_closed": "Gösterge paneli kapatıldı.",
70
+ "dashboard_missing": (
71
+ "Gösterge paneli için textual gereklidir.\n"
72
+ "Kurulum: pip install textual"
73
+ ),
74
+ # Misc
75
+ "undo_ok": "Geri alma başarılı. Önceki veri kümesi geri yüklendi.",
76
+ "undo_fail": "Geri alınacak bir şey yok.",
77
+ },
78
+ }
79
+
80
+
81
+ def set_locale(locale: str) -> None:
82
+ """Set the active locale (e.g. 'en', 'tr')."""
83
+ global _LOCALE
84
+ if locale not in _STRINGS:
85
+ raise ValueError(
86
+ f"Locale '{locale}' not available. "
87
+ f"Available: {', '.join(_STRINGS)}"
88
+ )
89
+ _LOCALE = locale
90
+
91
+
92
+ def get_locale() -> str:
93
+ """Return the currently active locale code."""
94
+ return _LOCALE
95
+
96
+
97
+ def register_locale(locale: str, strings: dict[str, str]) -> None:
98
+ """Register (or extend) a locale with a mapping of key → translated string.
99
+
100
+ Strings for keys not provided fall back to English.
101
+ """
102
+ if locale not in _STRINGS:
103
+ _STRINGS[locale] = {}
104
+ _STRINGS[locale].update(strings)
105
+
106
+
107
+ def t(key: str, **kwargs: object) -> str:
108
+ """Translate *key* using the active locale, with optional format args.
109
+
110
+ Falls back to English if the key is missing in the active locale.
111
+ Returns the key itself if missing everywhere.
112
+ """
113
+ locale_map = _STRINGS.get(_LOCALE, {})
114
+ template = locale_map.get(key) or _STRINGS["en"].get(key) or key
115
+ if kwargs:
116
+ try:
117
+ return template.format(**kwargs)
118
+ except KeyError:
119
+ return template
120
+ return template
File without changes