cobra4 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. cobra4-0.1.0/PKG-INFO +352 -0
  2. cobra4-0.1.0/README.md +307 -0
  3. cobra4-0.1.0/cobra4/__init__.py +10 -0
  4. cobra4-0.1.0/cobra4/ast_nodes.py +455 -0
  5. cobra4-0.1.0/cobra4/cli.py +660 -0
  6. cobra4-0.1.0/cobra4/codegen.py +544 -0
  7. cobra4-0.1.0/cobra4/dispatch_analysis.py +257 -0
  8. cobra4-0.1.0/cobra4/grammar.lark +284 -0
  9. cobra4-0.1.0/cobra4/import_hook.py +168 -0
  10. cobra4-0.1.0/cobra4/lexer.py +159 -0
  11. cobra4-0.1.0/cobra4/lowering.py +26 -0
  12. cobra4-0.1.0/cobra4/parser.py +1015 -0
  13. cobra4-0.1.0/cobra4/plugins/__init__.py +32 -0
  14. cobra4-0.1.0/cobra4/plugins/api.py +61 -0
  15. cobra4-0.1.0/cobra4/plugins/builtin/__init__.py +1 -0
  16. cobra4-0.1.0/cobra4/plugins/builtin/regex.py +88 -0
  17. cobra4-0.1.0/cobra4/plugins/builtin/sql.py +144 -0
  18. cobra4-0.1.0/cobra4/plugins/builtin/yaml.py +87 -0
  19. cobra4-0.1.0/cobra4/plugins/loader.py +167 -0
  20. cobra4-0.1.0/cobra4/resolver.py +405 -0
  21. cobra4-0.1.0/cobra4/runtime/__init__.py +74 -0
  22. cobra4-0.1.0/cobra4/runtime/concurrency.py +50 -0
  23. cobra4-0.1.0/cobra4/runtime/core.py +164 -0
  24. cobra4-0.1.0/cobra4/runtime/deploy.py +507 -0
  25. cobra4-0.1.0/cobra4/runtime/fleet.py +315 -0
  26. cobra4-0.1.0/cobra4/runtime/io.py +349 -0
  27. cobra4-0.1.0/cobra4/runtime/observe.py +124 -0
  28. cobra4-0.1.0/cobra4/runtime/schedule.py +485 -0
  29. cobra4-0.1.0/cobra4/runtime/secrets.py +165 -0
  30. cobra4-0.1.0/cobra4/runtime/smart.py +312 -0
  31. cobra4-0.1.0/cobra4/source_map.py +93 -0
  32. cobra4-0.1.0/cobra4/stdlib/__init__.py +139 -0
  33. cobra4-0.1.0/cobra4/stdlib/cli.c4 +87 -0
  34. cobra4-0.1.0/cobra4/stdlib/data.c4 +155 -0
  35. cobra4-0.1.0/cobra4/stdlib/fs.c4 +87 -0
  36. cobra4-0.1.0/cobra4/stdlib/http.c4 +114 -0
  37. cobra4-0.1.0/cobra4/stdlib/json.c4 +19 -0
  38. cobra4-0.1.0/cobra4/stdlib/strings.c4 +73 -0
  39. cobra4-0.1.0/cobra4/stdlib/test.c4 +94 -0
  40. cobra4-0.1.0/cobra4/stdlib/time.c4 +83 -0
  41. cobra4-0.1.0/cobra4/test_runner.py +221 -0
  42. cobra4-0.1.0/cobra4/tools/__init__.py +1 -0
  43. cobra4-0.1.0/cobra4/tools/fmt.py +380 -0
  44. cobra4-0.1.0/cobra4/tools/lsp.py +464 -0
  45. cobra4-0.1.0/cobra4/tools/repl.py +173 -0
  46. cobra4-0.1.0/cobra4/typecheck.py +365 -0
  47. cobra4-0.1.0/cobra4.egg-info/PKG-INFO +352 -0
  48. cobra4-0.1.0/cobra4.egg-info/SOURCES.txt +65 -0
  49. cobra4-0.1.0/cobra4.egg-info/dependency_links.txt +1 -0
  50. cobra4-0.1.0/cobra4.egg-info/entry_points.txt +3 -0
  51. cobra4-0.1.0/cobra4.egg-info/requires.txt +24 -0
  52. cobra4-0.1.0/cobra4.egg-info/top_level.txt +1 -0
  53. cobra4-0.1.0/pyproject.toml +70 -0
  54. cobra4-0.1.0/setup.cfg +4 -0
  55. cobra4-0.1.0/tests/test_cli.py +74 -0
  56. cobra4-0.1.0/tests/test_codegen.py +71 -0
  57. cobra4-0.1.0/tests/test_examples.py +95 -0
  58. cobra4-0.1.0/tests/test_lexer.py +56 -0
  59. cobra4-0.1.0/tests/test_m2.py +110 -0
  60. cobra4-0.1.0/tests/test_m3.py +145 -0
  61. cobra4-0.1.0/tests/test_m4.py +70 -0
  62. cobra4-0.1.0/tests/test_m5.py +64 -0
  63. cobra4-0.1.0/tests/test_parser.py +241 -0
  64. cobra4-0.1.0/tests/test_real_implementations.py +368 -0
  65. cobra4-0.1.0/tests/test_review_fixes.py +326 -0
  66. cobra4-0.1.0/tests/test_runtime.py +203 -0
  67. cobra4-0.1.0/tests/test_waves.py +233 -0
cobra4-0.1.0/PKG-INFO ADDED
@@ -0,0 +1,352 @@
1
+ Metadata-Version: 2.4
2
+ Name: cobra4
3
+ Version: 0.1.0
4
+ Summary: A high-level, cloud-native language transpiled to Python.
5
+ Author: cobra4 authors
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/cobra4-lang/cobra4
8
+ Project-URL: Repository, https://github.com/cobra4-lang/cobra4
9
+ Project-URL: Issues, https://github.com/cobra4-lang/cobra4/issues
10
+ Project-URL: Changelog, https://github.com/cobra4-lang/cobra4/releases
11
+ Keywords: language,transpiler,compiler,dsl,cloud,cloud-native,automation,lsp,smart-dispatch
12
+ Classifier: Development Status :: 3 - Alpha
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: License :: OSI Approved :: MIT License
15
+ Classifier: Operating System :: OS Independent
16
+ Classifier: Programming Language :: Python
17
+ Classifier: Programming Language :: Python :: 3
18
+ Classifier: Programming Language :: Python :: 3.11
19
+ Classifier: Programming Language :: Python :: 3.12
20
+ Classifier: Programming Language :: Python :: 3.13
21
+ Classifier: Topic :: Software Development :: Code Generators
22
+ Classifier: Topic :: Software Development :: Compilers
23
+ Classifier: Topic :: Software Development :: Interpreters
24
+ Classifier: Topic :: System :: Distributed Computing
25
+ Requires-Python: >=3.11
26
+ Description-Content-Type: text/markdown
27
+ Requires-Dist: lark>=1.1.9
28
+ Requires-Dist: requests>=2.31
29
+ Provides-Extra: aws
30
+ Requires-Dist: boto3>=1.34; extra == "aws"
31
+ Provides-Extra: data
32
+ Requires-Dist: pandas>=2.1; extra == "data"
33
+ Requires-Dist: pyarrow>=15.0; extra == "data"
34
+ Provides-Extra: ssh
35
+ Requires-Dist: paramiko>=3.4; extra == "ssh"
36
+ Provides-Extra: yaml
37
+ Requires-Dist: pyyaml>=6.0; extra == "yaml"
38
+ Provides-Extra: otel
39
+ Requires-Dist: opentelemetry-api>=1.20; extra == "otel"
40
+ Requires-Dist: opentelemetry-sdk>=1.20; extra == "otel"
41
+ Requires-Dist: opentelemetry-exporter-otlp>=1.20; extra == "otel"
42
+ Provides-Extra: dev
43
+ Requires-Dist: pytest>=8.0; extra == "dev"
44
+ Requires-Dist: black>=24.0; extra == "dev"
45
+
46
+ <img src="docs/assets/logo-128.png" align="right" width="128" alt="cobra4 logo">
47
+
48
+ # cobra4
49
+
50
+ A high-level, cloud-native language transpiled to Python.
51
+
52
+ cobra4 promotes patterns common in cloud automation, data pipelines, and
53
+ distributed jobs to first-class language constructs. A single line of
54
+ cobra4 often replaces a small Python program.
55
+
56
+ ```cobra4
57
+ # read auto-detects format & source
58
+ users = read("./data/users.csv")
59
+ config = read("https://api.example.com/config.json")
60
+ adults = read("s3://acme/people.parquet")
61
+
62
+ # parallel fan-out, no boilerplate
63
+ results = each url in urls in parallel(workers=10) { fetch(url) }
64
+
65
+ # scheduling as a block
66
+ every 5 minutes {
67
+ ingest()
68
+ }
69
+
70
+ # pattern matching
71
+ match resp.status {
72
+ case 200 { handle(resp.json) }
73
+ case _ { log.warn("unexpected", code=resp.status) }
74
+ }
75
+ ```
76
+
77
+ ## Mantra
78
+
79
+ 1. **Readability first** — no esoteric operators (no `|>`), English keywords.
80
+ 2. **One line = one program** — cloud/distributed patterns are syntax.
81
+ 3. **General purpose** — anything Python does, cobra4 does.
82
+ 4. **Extensible** — *libraries* (extend the runtime) and *language plugins*
83
+ (extend the parser/AST) are distinct, both first-class.
84
+
85
+ ## Quick start
86
+
87
+ ```bash
88
+ pip install -e .[aws,data,dev]
89
+
90
+ c4 run examples/03_etl.c4 # transpile + execute
91
+ c4 build examples/03_etl.c4 -o etl.py # transpile only
92
+ c4 fmt examples/03_etl.c4 # canonical format
93
+ c4 check examples/03_etl.c4 # parse + scope check
94
+ c4 repl # interactive
95
+ ```
96
+
97
+ ## The cobra4 mentality: smart functions
98
+
99
+ Built-in and stdlib functions are **open dispatchers**. Their behavior
100
+ depends on argument type, URI scheme, file extension, and MIME content
101
+ type — and any library can extend them at boot or at runtime:
102
+
103
+ ```python
104
+ # Python side (in a library)
105
+ from cobra4.runtime.io import read
106
+ import yaml
107
+ read.register(yaml.safe_load, type=str, scheme="file", ext="yml", name="local-yaml")
108
+ ```
109
+
110
+ User cobra4 code can opt-in for the same behavior with the `@smart` decorator:
111
+
112
+ ```cobra4
113
+ @smart
114
+ fn process(target) { return target }
115
+ process.register(scheme="s3", fn=fn(t) { ... })
116
+ process.register(type=DataFrame, fn=fn(df) { ... })
117
+ ```
118
+
119
+ Specificity wins. Ties at the same priority raise `AmbiguousDispatch`
120
+ on the first call — no silent fallbacks. See [`cobra4/runtime/smart.py`](cobra4/runtime/smart.py).
121
+
122
+ ## What's shipped
123
+
124
+ ### M1 — Core MVP (✅)
125
+
126
+ End-to-end working pipeline with a small but real surface:
127
+
128
+ - **Syntax**: brace-based blocks, English keywords, `?.` and `??` only.
129
+ - **Statements**: `if`/`elif`/`else`, `while`, `for`, `each ... (in parallel)?`,
130
+ `every`, `on event from`, `match`/`case`, `try`/`catch`/`finally`,
131
+ `serve`, `deploy`, `use`, `fn`, `class`.
132
+ - **Expressions**: full Python-like precedence; `?.` safe-nav; `??` default;
133
+ string interpolation `"hello {name}"`; lambdas `fn(x) = ...` / `fn(x) { ... }`.
134
+ - **Smart dispatch core** (`runtime/smart.py`): `SmartFn`, `@smart`,
135
+ `.register(...)`, type/scheme/ext/MIME/predicate matching, ambiguity
136
+ detection, resolution caching.
137
+ - **`read` / `save`**: smart-dispatch IO. Stdlib handlers for `csv`, `json`,
138
+ `jsonl`, `txt`, `md`, `parquet` × `./` `file://` `https://` `s3://`.
139
+ - **Concurrency**: `each ... in parallel { ... }` → thread/process pool fan-out.
140
+ - **CLI**: `c4 run | build | fmt | check | repl`. `serve` / `doc` / `deps` /
141
+ `plugin` are stubbed (planned milestones).
142
+ - **Source map**: line-to-line cobra4↔Python; tracebacks point back to source.
143
+ - **Test suite**: ~80 tests covering lexer, parser, codegen, runtime,
144
+ dispatcher, CLI, and end-to-end execution of every example.
145
+
146
+ ### M2 — Tipi & lint (✅)
147
+
148
+ - **Resolver** with nested function/class/block scopes; warnings for
149
+ undefined names and outer-scope shadowing.
150
+ - **Gradual type checker** ([typecheck.py](cobra4/typecheck.py)):
151
+ honors annotations, infers literal/binop types, warns on call-site /
152
+ return-type / default mismatches. Stays advisory — code still runs.
153
+ - **Dispatch static analysis** ([dispatch_analysis.py](cobra4/dispatch_analysis.py)):
154
+ flags two `read.register(...)` (or any SmartFn) calls with overlapping
155
+ dispatch keys at the same priority — runtime AmbiguousDispatch likely.
156
+ - `c4 check` wires all three; flags `--strict / --no-types / --no-shadowing`.
157
+
158
+ ### M3 — Cloud primitives (✅)
159
+
160
+ - **`fleet`** ([runtime/fleet.py](cobra4/runtime/fleet.py)): TOML inventory
161
+ (groups, glob patterns), `Host` dataclass, `run(cmd, host=...)` dispatching
162
+ to local subprocess or system `ssh`, `fan_out` parallel helper.
163
+ - **`secrets`** ([runtime/secrets.py](cobra4/runtime/secrets.py)): pluggable
164
+ backends — `env`, `file`, `vault` (hvac), `aws-sm` (boto3), `gcp-sm`.
165
+ Selection via `COBRA4_SECRETS_BACKEND` or `[secrets]` in `cobra4.toml`.
166
+ - **`deploy`** ([runtime/deploy.py](cobra4/runtime/deploy.py)): adapter
167
+ builder pattern (`aws.lambda(region="x")`), `register_adapter()`, dry-run
168
+ by default (set `COBRA4_DEPLOY_DRY_RUN=0` to live), `env_from(".env")`.
169
+
170
+ ### M4 — Daemon & event loop (✅)
171
+
172
+ - **`c4 serve FILE`**: imports the module to register `every` / `on event` /
173
+ `serve` callbacks, then runs the scheduler + event poller + HTTP servers
174
+ in dedicated threads until SIGINT.
175
+ - **HTTP**: `serve handler on :8080` boots `ThreadingHTTPServer`, request
176
+ → `Request(method, path, params, headers, body)`, JSON-encode return value.
177
+ - **Queues**: built-in `InMemoryQueue` with `.put` / `.poll`; the
178
+ `EventSource` protocol (`poll(timeout)`) lets users plug SQS/Rabbit/Kafka.
179
+
180
+ ### M5 — Language plugins (✅)
181
+
182
+ - **Plugin contract** ([plugins/api.py](cobra4/plugins/api.py)):
183
+ `LanguagePlugin(name, transform_source, runtime_module, builtins, description)`.
184
+ - **Loader** ([plugins/loader.py](cobra4/plugins/loader.py)) scans
185
+ `lang use NAME` directives at the top of a file, auto-imports
186
+ `cobra4.plugins.builtin.<name>` or `cobra4_lang_<name>`, and pre-processes
187
+ the source before the main parser.
188
+ - **Reference plugin** ([plugins/builtin/sql.py](cobra4/plugins/builtin/sql.py)):
189
+ rewrites `sql { SELECT ... }` blocks to `sql_run("...")` runtime calls.
190
+ - `c4 plugin list` prints registered plugins.
191
+
192
+ ### Post-M5 expansions (✅)
193
+
194
+ **Wave 1 — syntax fills**:
195
+ - Slice indexing: `xs[a:b]`, `xs[:b]`, `xs[a:]`, `xs[a:b:c]`, `xs[:]`.
196
+ - `where` filter on `each` and `for` (comprehension-style).
197
+ - OR-pattern (`case 1 | 2 | 3`) and guard (`case x if cond`) in `match`.
198
+
199
+ **Wave 2 — tooling (M6)**:
200
+ - Multi-line REPL ([tools/repl.py](cobra4/tools/repl.py)) with bracket-aware continuation.
201
+ - Canonical formatter ([tools/fmt.py](cobra4/tools/fmt.py)) — re-emits cobra4 from the AST, idempotent.
202
+ - LSP server ([tools/lsp.py](cobra4/tools/lsp.py)) on stdio: diagnostics from
203
+ parser+resolver+typecheck, formatting, hover with type info. Run with `c4 lsp`.
204
+
205
+ **Wave 3 — more language plugins**:
206
+ - `lang use regex` ([plugins/builtin/regex.py](cobra4/plugins/builtin/regex.py)) — `p = re"[a-z]+"i` literals.
207
+ - `lang use yaml` ([plugins/builtin/yaml.py](cobra4/plugins/builtin/yaml.py)) — inline YAML literals via `yaml"""..."""`.
208
+
209
+ **Wave 4 — cloud hardening**:
210
+ - Paramiko SSH backend with key-agent + sftp, fallback to system `ssh`. Install via `cobra4[ssh]`.
211
+ - AWS Lambda packaging: deterministic zip with vendored cobra4 runtime, idempotent create-or-update.
212
+ - OpenTelemetry log export: set `COBRA4_OTEL_EXPORT=1` after `pip install cobra4[otel]`.
213
+
214
+ **Wave 5 — stdlib + packaging (M7)**:
215
+ - `c4 deps add | list | remove | install` — manages `[deps]` in `cobra4.toml`, calls pip for install.
216
+ - `c4 doc FILE` — extracts docstrings + signatures to markdown.
217
+ - **Stdlib written in cobra4 itself**: [cobra4/stdlib/http.c4](cobra4/stdlib/http.c4),
218
+ [cobra4/stdlib/json.c4](cobra4/stdlib/json.c4), [cobra4/stdlib/fs.c4](cobra4/stdlib/fs.c4).
219
+ Loaded via a custom Python import hook with `__pycache__/*.cobra4.pyc` mtime-based caching —
220
+ re-import is a single file read, not a re-transpile.
221
+
222
+ ### Hardening pass (post-review)
223
+
224
+ A code-review pass identified 14 issues; the ones with runtime impact were
225
+ fixed and locked in with regression tests in [test_review_fixes.py](tests/test_review_fixes.py):
226
+
227
+ - **SmartFn cache bypassed when any handler uses `when=`** — the (type,
228
+ scheme, ext, mime) cache key is unsafe in the presence of value-dependent
229
+ predicates. The dispatcher now detects this and re-resolves on every call;
230
+ cache stays active when no handler uses `when=`.
231
+ - **`?.` resolves dict keys** — `req?.params?.name` works whether `params`
232
+ is an object (`getattr`) or a dict/Mapping (`.get`). Missing attribute
233
+ returns `None` rather than raising, so `??` composes cleanly.
234
+ - **`c4 fmt` is plugin-aware** — `lang use NAME` directives and
235
+ plugin-specific blocks (`sql {...}`, `re"..."`, `yaml"""..."""`) are
236
+ preserved verbatim through formatting via the new
237
+ `LanguagePlugin.preserve_for_format` hook.
238
+ - **`c4 check` knows plugin builtins** — the resolver now accepts
239
+ `extra_builtins` from the active plugins, so `sql_run` etc. don't get
240
+ flagged as undefined.
241
+ - **`dispatch_analysis` flags semantic overlaps** — not just identical
242
+ keys: also a generic handler subsuming a specific one at the same priority
243
+ (D002), and `when=` predicates with no other constraints (D003, since
244
+ they invalidate the cache for the whole SmartFn).
245
+ - **`fleet.run` is `shell=False` by default** — explicit opt-in with
246
+ `shell=True` for shell features. Argv form (list) is the recommended
247
+ path. Removes injection surface.
248
+ - **paramiko uses `RejectPolicy` by default** — unknown host keys are
249
+ rejected. Override per-host with `host_key_policy="auto"` in the
250
+ inventory `extra` map, or globally via `COBRA4_SSH_HOST_KEY_POLICY=auto`.
251
+ - **`save()` is atomic** — writes to a temp file in the target's directory,
252
+ fsyncs, then `os.replace()`. Crash mid-write leaves either old file or
253
+ new file, never a half-written one.
254
+ - **HTTP server**: binds `127.0.0.1` by default (override with
255
+ `COBRA4_HTTP_BIND=0.0.0.0`); response content-type is inferred from
256
+ return type (dict/list → JSON, str → text/plain, bytes → octet-stream);
257
+ handlers can return `(status, headers, body)` tuples; `Request` exposes
258
+ `.json()` and `.text()` body decoders.
259
+ - **Stdlib import hook caches** — `__pycache__/<name>.cobra4.pyc` keyed
260
+ on source mtime+size. Edit `.c4`, next import re-transpiles; otherwise
261
+ loads from cache.
262
+ - **Dispatch tracing** — set `COBRA4_TRACE_DISPATCH=1` to get a one-line
263
+ log of every smart-fn resolution. Makes the "smart" routing observable.
264
+
265
+ ### Operational env vars
266
+
267
+ | Var | What it does |
268
+ |---|---|
269
+ | `COBRA4_TRACE_DISPATCH=1` | Log every `SmartFn` resolution to stderr. |
270
+ | `COBRA4_HTTP_BIND=0.0.0.0` | Override the daemon HTTP bind address (default `127.0.0.1`). |
271
+ | `COBRA4_SSH_HOST_KEY_POLICY=auto` | Use paramiko `AutoAddPolicy` (default is `RejectPolicy`). |
272
+ | `COBRA4_DEPLOY_DRY_RUN=0` | Actually invoke deploy adapters (default is dry-run). |
273
+ | `COBRA4_LOG_FORMAT=json` | Switch `log()` from key=value to JSON-line. |
274
+ | `COBRA4_OTEL_EXPORT=1` | Forward log records to OTel (requires `cobra4[otel]`). |
275
+ | `COBRA4_SECRETS_BACKEND=env\|file\|vault\|aws-sm\|gcp-sm` | Pick a secrets backend. |
276
+ | `COBRA4_SECRETS_DIR=…` | Override the file-backend root (default `~/.cobra4/secrets/`). |
277
+ | `COBRA4_LAMBDA_ROLE=arn:…` | IAM role ARN for AWS Lambda deploys. |
278
+ | `COBRA4_SQL_URL=postgresql://…` | Default SQLAlchemy URL for the `sql` plugin. |
279
+ | `COBRA4_QUEUE_BACKEND=memory\|file\|sqs\|redis` | Default backend for `queue("name")`. |
280
+ | `COBRA4_FILE_QUEUE_DIR=…` | Where the FileQueue stores events. |
281
+ | `COBRA4_REDIS_URL=redis://…` | Connection URL for the Redis queue backend. |
282
+
283
+ ### "Make it real" pass (✅)
284
+
285
+ Removes every stub and aspirational feature. After this pass, every
286
+ claim in this README has working code and regression tests.
287
+
288
+ - **Pattern matching completed**: `*rest` in lists, `**rest` in dicts,
289
+ `(a, b)` tuple destructure. Already shipped: OR-patterns, guards.
290
+ - **Local `.c4` modules**: `use mymodule` resolves `mymodule.c4` from
291
+ `sys.path` via [import_hook.py](cobra4/import_hook.py), with
292
+ mtime-keyed bytecode cache.
293
+ - **Parser error recovery**: `parse_collect_errors()` reports multiple
294
+ diagnostics in one shot.
295
+ - **Cloud adapters made real**: `gcp.run` builds Docker + `gcloud`,
296
+ `k8s` generates manifests + `kubectl apply`, `fly` calls `flyctl`,
297
+ `aws.lambda` packages with vendored runtime + boto3 create/update.
298
+ - **SQL plugin executes**: `configure(url)` or `COBRA4_SQL_URL` enables
299
+ real SQLAlchemy execution with `:name` parameters.
300
+ - **Queue backends real**: `InMemoryQueue`, `FileQueue` (durable),
301
+ `SQSQueue`, `RedisQueue`.
302
+ - **Test framework**: `c4 test` discovers/runs `tests/test_*.c4` with
303
+ pytest-style output, optional JUnit XML.
304
+ - **LSP completed**: go-to-definition, find-references,
305
+ document-symbols, completion (in addition to existing diagnostics + format + hover).
306
+ - **Source map column-precise**: tracebacks point to `line:col`.
307
+ - **REPL**: tab completion + history (`~/.cobra4/history`).
308
+ - **`c4 run --watch`**, **`c4 deps install --venv`**,
309
+ **`c4 plugin add NAME`**, **`c4 doc --html`**.
310
+ - **Stdlib expanded**: `http` (Session, retries, auth), `fs` (walk,
311
+ copy, copytree, move), `data` (group_by, aggregate, join, sort_by),
312
+ `time` (parse/fmt durations), `strings` (slugify, camel_to_snake),
313
+ `cli` (App builder), `test` (assertion DSL).
314
+
315
+ **End-to-end examples that exercise everything**:
316
+ - [09_log_analyzer.c4](examples/09_log_analyzer.c4) — log parsing →
317
+ per-status & per-path aggregations → JSON report.
318
+ - [10_webhook_router.c4](examples/10_webhook_router.c4) — HTTP server
319
+ with bearer auth, pattern-matched routing, real SQLite via the `sql`
320
+ plugin.
321
+
322
+ **176 tests, all green. Every example runs end-to-end.**
323
+
324
+ See `~/.claude/plans/voglio-realizzare-cobra4-un-curried-lark.md` for the
325
+ full roadmap.
326
+
327
+ ## Project layout
328
+
329
+ ```
330
+ cobra4/
331
+ cli.py CLI: run, build, fmt, check, repl
332
+ grammar.lark LALR(1) grammar
333
+ lexer.py Lark wrapper + bracket-aware postlex
334
+ parser.py Tree → AST transformer
335
+ ast_nodes.py AST dataclasses
336
+ resolver.py Scope check, lvalue validation
337
+ lowering.py Surface AST → core AST (M1: identity)
338
+ codegen.py Core AST → Python source
339
+ source_map.py Line-to-line mapping
340
+ runtime/
341
+ smart.py SmartFn / @smart / open dispatch (the heart)
342
+ io.py read / save with stdlib handlers
343
+ concurrency.py parallel_for
344
+ observe.py structured log
345
+ core.py ?., ??, every/on_event/serve/deploy registries
346
+ examples/ 01-05 end-to-end programs
347
+ tests/ lexer, parser, codegen, runtime, CLI, examples
348
+ ```
349
+
350
+ ## License
351
+
352
+ MIT.
cobra4-0.1.0/README.md ADDED
@@ -0,0 +1,307 @@
1
+ <img src="docs/assets/logo-128.png" align="right" width="128" alt="cobra4 logo">
2
+
3
+ # cobra4
4
+
5
+ A high-level, cloud-native language transpiled to Python.
6
+
7
+ cobra4 promotes patterns common in cloud automation, data pipelines, and
8
+ distributed jobs to first-class language constructs. A single line of
9
+ cobra4 often replaces a small Python program.
10
+
11
+ ```cobra4
12
+ # read auto-detects format & source
13
+ users = read("./data/users.csv")
14
+ config = read("https://api.example.com/config.json")
15
+ adults = read("s3://acme/people.parquet")
16
+
17
+ # parallel fan-out, no boilerplate
18
+ results = each url in urls in parallel(workers=10) { fetch(url) }
19
+
20
+ # scheduling as a block
21
+ every 5 minutes {
22
+ ingest()
23
+ }
24
+
25
+ # pattern matching
26
+ match resp.status {
27
+ case 200 { handle(resp.json) }
28
+ case _ { log.warn("unexpected", code=resp.status) }
29
+ }
30
+ ```
31
+
32
+ ## Mantra
33
+
34
+ 1. **Readability first** — no esoteric operators (no `|>`), English keywords.
35
+ 2. **One line = one program** — cloud/distributed patterns are syntax.
36
+ 3. **General purpose** — anything Python does, cobra4 does.
37
+ 4. **Extensible** — *libraries* (extend the runtime) and *language plugins*
38
+ (extend the parser/AST) are distinct, both first-class.
39
+
40
+ ## Quick start
41
+
42
+ ```bash
43
+ pip install -e .[aws,data,dev]
44
+
45
+ c4 run examples/03_etl.c4 # transpile + execute
46
+ c4 build examples/03_etl.c4 -o etl.py # transpile only
47
+ c4 fmt examples/03_etl.c4 # canonical format
48
+ c4 check examples/03_etl.c4 # parse + scope check
49
+ c4 repl # interactive
50
+ ```
51
+
52
+ ## The cobra4 mentality: smart functions
53
+
54
+ Built-in and stdlib functions are **open dispatchers**. Their behavior
55
+ depends on argument type, URI scheme, file extension, and MIME content
56
+ type — and any library can extend them at boot or at runtime:
57
+
58
+ ```python
59
+ # Python side (in a library)
60
+ from cobra4.runtime.io import read
61
+ import yaml
62
+ read.register(yaml.safe_load, type=str, scheme="file", ext="yml", name="local-yaml")
63
+ ```
64
+
65
+ User cobra4 code can opt-in for the same behavior with the `@smart` decorator:
66
+
67
+ ```cobra4
68
+ @smart
69
+ fn process(target) { return target }
70
+ process.register(scheme="s3", fn=fn(t) { ... })
71
+ process.register(type=DataFrame, fn=fn(df) { ... })
72
+ ```
73
+
74
+ Specificity wins. Ties at the same priority raise `AmbiguousDispatch`
75
+ on the first call — no silent fallbacks. See [`cobra4/runtime/smart.py`](cobra4/runtime/smart.py).
76
+
77
+ ## What's shipped
78
+
79
+ ### M1 — Core MVP (✅)
80
+
81
+ End-to-end working pipeline with a small but real surface:
82
+
83
+ - **Syntax**: brace-based blocks, English keywords, `?.` and `??` only.
84
+ - **Statements**: `if`/`elif`/`else`, `while`, `for`, `each ... (in parallel)?`,
85
+ `every`, `on event from`, `match`/`case`, `try`/`catch`/`finally`,
86
+ `serve`, `deploy`, `use`, `fn`, `class`.
87
+ - **Expressions**: full Python-like precedence; `?.` safe-nav; `??` default;
88
+ string interpolation `"hello {name}"`; lambdas `fn(x) = ...` / `fn(x) { ... }`.
89
+ - **Smart dispatch core** (`runtime/smart.py`): `SmartFn`, `@smart`,
90
+ `.register(...)`, type/scheme/ext/MIME/predicate matching, ambiguity
91
+ detection, resolution caching.
92
+ - **`read` / `save`**: smart-dispatch IO. Stdlib handlers for `csv`, `json`,
93
+ `jsonl`, `txt`, `md`, `parquet` × `./` `file://` `https://` `s3://`.
94
+ - **Concurrency**: `each ... in parallel { ... }` → thread/process pool fan-out.
95
+ - **CLI**: `c4 run | build | fmt | check | repl`. `serve` / `doc` / `deps` /
96
+ `plugin` are stubbed (planned milestones).
97
+ - **Source map**: line-to-line cobra4↔Python; tracebacks point back to source.
98
+ - **Test suite**: ~80 tests covering lexer, parser, codegen, runtime,
99
+ dispatcher, CLI, and end-to-end execution of every example.
100
+
101
+ ### M2 — Tipi & lint (✅)
102
+
103
+ - **Resolver** with nested function/class/block scopes; warnings for
104
+ undefined names and outer-scope shadowing.
105
+ - **Gradual type checker** ([typecheck.py](cobra4/typecheck.py)):
106
+ honors annotations, infers literal/binop types, warns on call-site /
107
+ return-type / default mismatches. Stays advisory — code still runs.
108
+ - **Dispatch static analysis** ([dispatch_analysis.py](cobra4/dispatch_analysis.py)):
109
+ flags two `read.register(...)` (or any SmartFn) calls with overlapping
110
+ dispatch keys at the same priority — runtime AmbiguousDispatch likely.
111
+ - `c4 check` wires all three; flags `--strict / --no-types / --no-shadowing`.
112
+
113
+ ### M3 — Cloud primitives (✅)
114
+
115
+ - **`fleet`** ([runtime/fleet.py](cobra4/runtime/fleet.py)): TOML inventory
116
+ (groups, glob patterns), `Host` dataclass, `run(cmd, host=...)` dispatching
117
+ to local subprocess or system `ssh`, `fan_out` parallel helper.
118
+ - **`secrets`** ([runtime/secrets.py](cobra4/runtime/secrets.py)): pluggable
119
+ backends — `env`, `file`, `vault` (hvac), `aws-sm` (boto3), `gcp-sm`.
120
+ Selection via `COBRA4_SECRETS_BACKEND` or `[secrets]` in `cobra4.toml`.
121
+ - **`deploy`** ([runtime/deploy.py](cobra4/runtime/deploy.py)): adapter
122
+ builder pattern (`aws.lambda(region="x")`), `register_adapter()`, dry-run
123
+ by default (set `COBRA4_DEPLOY_DRY_RUN=0` to live), `env_from(".env")`.
124
+
125
+ ### M4 — Daemon & event loop (✅)
126
+
127
+ - **`c4 serve FILE`**: imports the module to register `every` / `on event` /
128
+ `serve` callbacks, then runs the scheduler + event poller + HTTP servers
129
+ in dedicated threads until SIGINT.
130
+ - **HTTP**: `serve handler on :8080` boots `ThreadingHTTPServer`, request
131
+ → `Request(method, path, params, headers, body)`, JSON-encode return value.
132
+ - **Queues**: built-in `InMemoryQueue` with `.put` / `.poll`; the
133
+ `EventSource` protocol (`poll(timeout)`) lets users plug SQS/Rabbit/Kafka.
134
+
135
+ ### M5 — Language plugins (✅)
136
+
137
+ - **Plugin contract** ([plugins/api.py](cobra4/plugins/api.py)):
138
+ `LanguagePlugin(name, transform_source, runtime_module, builtins, description)`.
139
+ - **Loader** ([plugins/loader.py](cobra4/plugins/loader.py)) scans
140
+ `lang use NAME` directives at the top of a file, auto-imports
141
+ `cobra4.plugins.builtin.<name>` or `cobra4_lang_<name>`, and pre-processes
142
+ the source before the main parser.
143
+ - **Reference plugin** ([plugins/builtin/sql.py](cobra4/plugins/builtin/sql.py)):
144
+ rewrites `sql { SELECT ... }` blocks to `sql_run("...")` runtime calls.
145
+ - `c4 plugin list` prints registered plugins.
146
+
147
+ ### Post-M5 expansions (✅)
148
+
149
+ **Wave 1 — syntax fills**:
150
+ - Slice indexing: `xs[a:b]`, `xs[:b]`, `xs[a:]`, `xs[a:b:c]`, `xs[:]`.
151
+ - `where` filter on `each` and `for` (comprehension-style).
152
+ - OR-pattern (`case 1 | 2 | 3`) and guard (`case x if cond`) in `match`.
153
+
154
+ **Wave 2 — tooling (M6)**:
155
+ - Multi-line REPL ([tools/repl.py](cobra4/tools/repl.py)) with bracket-aware continuation.
156
+ - Canonical formatter ([tools/fmt.py](cobra4/tools/fmt.py)) — re-emits cobra4 from the AST, idempotent.
157
+ - LSP server ([tools/lsp.py](cobra4/tools/lsp.py)) on stdio: diagnostics from
158
+ parser+resolver+typecheck, formatting, hover with type info. Run with `c4 lsp`.
159
+
160
+ **Wave 3 — more language plugins**:
161
+ - `lang use regex` ([plugins/builtin/regex.py](cobra4/plugins/builtin/regex.py)) — `p = re"[a-z]+"i` literals.
162
+ - `lang use yaml` ([plugins/builtin/yaml.py](cobra4/plugins/builtin/yaml.py)) — inline YAML literals via `yaml"""..."""`.
163
+
164
+ **Wave 4 — cloud hardening**:
165
+ - Paramiko SSH backend with key-agent + sftp, fallback to system `ssh`. Install via `cobra4[ssh]`.
166
+ - AWS Lambda packaging: deterministic zip with vendored cobra4 runtime, idempotent create-or-update.
167
+ - OpenTelemetry log export: set `COBRA4_OTEL_EXPORT=1` after `pip install cobra4[otel]`.
168
+
169
+ **Wave 5 — stdlib + packaging (M7)**:
170
+ - `c4 deps add | list | remove | install` — manages `[deps]` in `cobra4.toml`, calls pip for install.
171
+ - `c4 doc FILE` — extracts docstrings + signatures to markdown.
172
+ - **Stdlib written in cobra4 itself**: [cobra4/stdlib/http.c4](cobra4/stdlib/http.c4),
173
+ [cobra4/stdlib/json.c4](cobra4/stdlib/json.c4), [cobra4/stdlib/fs.c4](cobra4/stdlib/fs.c4).
174
+ Loaded via a custom Python import hook with `__pycache__/*.cobra4.pyc` mtime-based caching —
175
+ re-import is a single file read, not a re-transpile.
176
+
177
+ ### Hardening pass (post-review)
178
+
179
+ A code-review pass identified 14 issues; the ones with runtime impact were
180
+ fixed and locked in with regression tests in [test_review_fixes.py](tests/test_review_fixes.py):
181
+
182
+ - **SmartFn cache bypassed when any handler uses `when=`** — the (type,
183
+ scheme, ext, mime) cache key is unsafe in the presence of value-dependent
184
+ predicates. The dispatcher now detects this and re-resolves on every call;
185
+ cache stays active when no handler uses `when=`.
186
+ - **`?.` resolves dict keys** — `req?.params?.name` works whether `params`
187
+ is an object (`getattr`) or a dict/Mapping (`.get`). Missing attribute
188
+ returns `None` rather than raising, so `??` composes cleanly.
189
+ - **`c4 fmt` is plugin-aware** — `lang use NAME` directives and
190
+ plugin-specific blocks (`sql {...}`, `re"..."`, `yaml"""..."""`) are
191
+ preserved verbatim through formatting via the new
192
+ `LanguagePlugin.preserve_for_format` hook.
193
+ - **`c4 check` knows plugin builtins** — the resolver now accepts
194
+ `extra_builtins` from the active plugins, so `sql_run` etc. don't get
195
+ flagged as undefined.
196
+ - **`dispatch_analysis` flags semantic overlaps** — not just identical
197
+ keys: also a generic handler subsuming a specific one at the same priority
198
+ (D002), and `when=` predicates with no other constraints (D003, since
199
+ they invalidate the cache for the whole SmartFn).
200
+ - **`fleet.run` is `shell=False` by default** — explicit opt-in with
201
+ `shell=True` for shell features. Argv form (list) is the recommended
202
+ path. Removes injection surface.
203
+ - **paramiko uses `RejectPolicy` by default** — unknown host keys are
204
+ rejected. Override per-host with `host_key_policy="auto"` in the
205
+ inventory `extra` map, or globally via `COBRA4_SSH_HOST_KEY_POLICY=auto`.
206
+ - **`save()` is atomic** — writes to a temp file in the target's directory,
207
+ fsyncs, then `os.replace()`. Crash mid-write leaves either old file or
208
+ new file, never a half-written one.
209
+ - **HTTP server**: binds `127.0.0.1` by default (override with
210
+ `COBRA4_HTTP_BIND=0.0.0.0`); response content-type is inferred from
211
+ return type (dict/list → JSON, str → text/plain, bytes → octet-stream);
212
+ handlers can return `(status, headers, body)` tuples; `Request` exposes
213
+ `.json()` and `.text()` body decoders.
214
+ - **Stdlib import hook caches** — `__pycache__/<name>.cobra4.pyc` keyed
215
+ on source mtime+size. Edit `.c4`, next import re-transpiles; otherwise
216
+ loads from cache.
217
+ - **Dispatch tracing** — set `COBRA4_TRACE_DISPATCH=1` to get a one-line
218
+ log of every smart-fn resolution. Makes the "smart" routing observable.
219
+
220
+ ### Operational env vars
221
+
222
+ | Var | What it does |
223
+ |---|---|
224
+ | `COBRA4_TRACE_DISPATCH=1` | Log every `SmartFn` resolution to stderr. |
225
+ | `COBRA4_HTTP_BIND=0.0.0.0` | Override the daemon HTTP bind address (default `127.0.0.1`). |
226
+ | `COBRA4_SSH_HOST_KEY_POLICY=auto` | Use paramiko `AutoAddPolicy` (default is `RejectPolicy`). |
227
+ | `COBRA4_DEPLOY_DRY_RUN=0` | Actually invoke deploy adapters (default is dry-run). |
228
+ | `COBRA4_LOG_FORMAT=json` | Switch `log()` from key=value to JSON-line. |
229
+ | `COBRA4_OTEL_EXPORT=1` | Forward log records to OTel (requires `cobra4[otel]`). |
230
+ | `COBRA4_SECRETS_BACKEND=env\|file\|vault\|aws-sm\|gcp-sm` | Pick a secrets backend. |
231
+ | `COBRA4_SECRETS_DIR=…` | Override the file-backend root (default `~/.cobra4/secrets/`). |
232
+ | `COBRA4_LAMBDA_ROLE=arn:…` | IAM role ARN for AWS Lambda deploys. |
233
+ | `COBRA4_SQL_URL=postgresql://…` | Default SQLAlchemy URL for the `sql` plugin. |
234
+ | `COBRA4_QUEUE_BACKEND=memory\|file\|sqs\|redis` | Default backend for `queue("name")`. |
235
+ | `COBRA4_FILE_QUEUE_DIR=…` | Where the FileQueue stores events. |
236
+ | `COBRA4_REDIS_URL=redis://…` | Connection URL for the Redis queue backend. |
237
+
238
+ ### "Make it real" pass (✅)
239
+
240
+ Removes every stub and aspirational feature. After this pass, every
241
+ claim in this README has working code and regression tests.
242
+
243
+ - **Pattern matching completed**: `*rest` in lists, `**rest` in dicts,
244
+ `(a, b)` tuple destructure. Already shipped: OR-patterns, guards.
245
+ - **Local `.c4` modules**: `use mymodule` resolves `mymodule.c4` from
246
+ `sys.path` via [import_hook.py](cobra4/import_hook.py), with
247
+ mtime-keyed bytecode cache.
248
+ - **Parser error recovery**: `parse_collect_errors()` reports multiple
249
+ diagnostics in one shot.
250
+ - **Cloud adapters made real**: `gcp.run` builds Docker + `gcloud`,
251
+ `k8s` generates manifests + `kubectl apply`, `fly` calls `flyctl`,
252
+ `aws.lambda` packages with vendored runtime + boto3 create/update.
253
+ - **SQL plugin executes**: `configure(url)` or `COBRA4_SQL_URL` enables
254
+ real SQLAlchemy execution with `:name` parameters.
255
+ - **Queue backends real**: `InMemoryQueue`, `FileQueue` (durable),
256
+ `SQSQueue`, `RedisQueue`.
257
+ - **Test framework**: `c4 test` discovers/runs `tests/test_*.c4` with
258
+ pytest-style output, optional JUnit XML.
259
+ - **LSP completed**: go-to-definition, find-references,
260
+ document-symbols, completion (in addition to existing diagnostics + format + hover).
261
+ - **Source map column-precise**: tracebacks point to `line:col`.
262
+ - **REPL**: tab completion + history (`~/.cobra4/history`).
263
+ - **`c4 run --watch`**, **`c4 deps install --venv`**,
264
+ **`c4 plugin add NAME`**, **`c4 doc --html`**.
265
+ - **Stdlib expanded**: `http` (Session, retries, auth), `fs` (walk,
266
+ copy, copytree, move), `data` (group_by, aggregate, join, sort_by),
267
+ `time` (parse/fmt durations), `strings` (slugify, camel_to_snake),
268
+ `cli` (App builder), `test` (assertion DSL).
269
+
270
+ **End-to-end examples that exercise everything**:
271
+ - [09_log_analyzer.c4](examples/09_log_analyzer.c4) — log parsing →
272
+ per-status & per-path aggregations → JSON report.
273
+ - [10_webhook_router.c4](examples/10_webhook_router.c4) — HTTP server
274
+ with bearer auth, pattern-matched routing, real SQLite via the `sql`
275
+ plugin.
276
+
277
+ **176 tests, all green. Every example runs end-to-end.**
278
+
279
+ See `~/.claude/plans/voglio-realizzare-cobra4-un-curried-lark.md` for the
280
+ full roadmap.
281
+
282
+ ## Project layout
283
+
284
+ ```
285
+ cobra4/
286
+ cli.py CLI: run, build, fmt, check, repl
287
+ grammar.lark LALR(1) grammar
288
+ lexer.py Lark wrapper + bracket-aware postlex
289
+ parser.py Tree → AST transformer
290
+ ast_nodes.py AST dataclasses
291
+ resolver.py Scope check, lvalue validation
292
+ lowering.py Surface AST → core AST (M1: identity)
293
+ codegen.py Core AST → Python source
294
+ source_map.py Line-to-line mapping
295
+ runtime/
296
+ smart.py SmartFn / @smart / open dispatch (the heart)
297
+ io.py read / save with stdlib handlers
298
+ concurrency.py parallel_for
299
+ observe.py structured log
300
+ core.py ?., ??, every/on_event/serve/deploy registries
301
+ examples/ 01-05 end-to-end programs
302
+ tests/ lexer, parser, codegen, runtime, CLI, examples
303
+ ```
304
+
305
+ ## License
306
+
307
+ MIT.
@@ -0,0 +1,10 @@
1
+ """cobra4 — high-level cloud-native language transpiled to Python."""
2
+
3
+ __version__ = "0.1.0"
4
+
5
+ # Activate the .c4 import finder so user code can do
6
+ # use my_module # → my_module.c4 anywhere on sys.path
7
+ # use mypkg.utils # → mypkg/utils.c4
8
+ # without relying on transpile-then-import workflows.
9
+ from cobra4 import import_hook as _c4_import_hook
10
+ _c4_import_hook.install()