skylar 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
skylar-0.1.0/PKG-INFO ADDED
@@ -0,0 +1,88 @@
1
+ Metadata-Version: 2.4
2
+ Name: skylar
3
+ Version: 0.1.0
4
+ Summary: Skylar — local, sovereign, from-scratch LLMs. CLI + loader for the Skylar model family (COBOL specialist & more).
5
+ Author: A. Ivanovitch
6
+ License: Apache-2.0
7
+ Project-URL: Models, https://huggingface.co/Sophia-AI
8
+ Keywords: llm,cobol,code-generation,sovereign-ai,from-scratch
9
+ Classifier: License :: OSI Approved :: Apache Software License
10
+ Classifier: Programming Language :: Python :: 3
11
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
12
+ Requires-Python: >=3.9
13
+ Description-Content-Type: text/markdown
14
+ Requires-Dist: torch>=2.1
15
+ Requires-Dist: transformers>=4.40
16
+ Requires-Dist: tokenizers>=0.15
17
+ Requires-Dist: huggingface_hub>=0.20
18
+ Requires-Dist: rich>=13.0
19
+ Provides-Extra: serve
20
+ Requires-Dist: fastapi>=0.100; extra == "serve"
21
+ Requires-Dist: uvicorn>=0.23; extra == "serve"
22
+ Requires-Dist: pydantic>=2.0; extra == "serve"
23
+
24
+ # skylar
25
+
26
+ Local, sovereign, **from-scratch** LLMs — a tiny runtime + CLI for the Skylar model family.
27
+ First release ships the **COBOL specialist** (`Sophia-AI/SkylarCobol-390M`): a 390M model that
28
+ beats 7B general code models on COBOL generation, small enough to run on a single GPU (or CPU).
29
+
30
+ ## Install
31
+
32
+ ```bash
33
+ pip install skylar
34
+ # optional HTTP server:
35
+ pip install "skylar[serve]"
36
+ ```
37
+
38
+ ## Use it — CLI
39
+
40
+ ```bash
41
+ # interactive chat (auto-downloads the model from HuggingFace the first time)
42
+ skylar chat
43
+
44
+ # one-shot completion
45
+ skylar generate --prompt "Scrivi un sottoprogramma COBOL che somma due interi in RESULT."
46
+
47
+ # point at a different model or a local checkpoint
48
+ skylar chat --model Sophia-AI/SkylarCobol-390M
49
+ skylar generate --model ./my-checkpoint --prompt "..."
50
+
51
+ # OpenAI-compatible server (needs the [serve] extra)
52
+ skylar serve --port 8000
53
+ # POST /generate {"prompt": "..."}
54
+ # POST /v1/chat/completions {"messages": [...]}
55
+ ```
56
+
57
+ By default the system prompt is *"Sei un esperto programmatore COBOL."* and decoding is greedy
58
+ (`--temperature 0.0`). Override with `--system` / `--temperature`.
59
+
60
+ ## Use it — Python
61
+
62
+ ```python
63
+ import skylar
64
+
65
+ m = skylar.load("Sophia-AI/SkylarCobol-390M") # or a local dir
66
+ print(m.generate("Complete a COBOL paragraph that stores the max of two numbers in RESULT."))
67
+
68
+ for delta in m.stream("..."): # streaming
69
+ print(delta, end="", flush=True)
70
+ ```
71
+
72
+ `skylar` also registers the architecture with 🤗 Transformers, so this works too:
73
+
74
+ ```python
75
+ import skylar # registers nano-transformer
76
+ from transformers import AutoModelForCausalLM
77
+ model = AutoModelForCausalLM.from_pretrained("Sophia-AI/SkylarCobol-390M")
78
+ ```
79
+
80
+ ## What's inside
81
+
82
+ The Skylar models use a custom decoder (`NanoTransformer`, Qwen3-style: RMSNorm + RoPE + GQA +
83
+ QK-Norm + SwiGLU), trained 100% from scratch (no third-party pretrained weights). This package
84
+ vendors the architecture so the published weights load anywhere — no private framework needed.
85
+
86
+ ## License
87
+
88
+ Apache-2.0. Models & code IP: A. Ivanovitch (Sophia AI).
skylar-0.1.0/README.md ADDED
@@ -0,0 +1,65 @@
1
+ # skylar
2
+
3
+ Local, sovereign, **from-scratch** LLMs — a tiny runtime + CLI for the Skylar model family.
4
+ First release ships the **COBOL specialist** (`Sophia-AI/SkylarCobol-390M`): a 390M model that
5
+ beats 7B general code models on COBOL generation, small enough to run on a single GPU (or CPU).
6
+
7
+ ## Install
8
+
9
+ ```bash
10
+ pip install skylar
11
+ # optional HTTP server:
12
+ pip install "skylar[serve]"
13
+ ```
14
+
15
+ ## Use it — CLI
16
+
17
+ ```bash
18
+ # interactive chat (auto-downloads the model from HuggingFace the first time)
19
+ skylar chat
20
+
21
+ # one-shot completion
22
+ skylar generate --prompt "Scrivi un sottoprogramma COBOL che somma due interi in RESULT."
23
+
24
+ # point at a different model or a local checkpoint
25
+ skylar chat --model Sophia-AI/SkylarCobol-390M
26
+ skylar generate --model ./my-checkpoint --prompt "..."
27
+
28
+ # OpenAI-compatible server (needs the [serve] extra)
29
+ skylar serve --port 8000
30
+ # POST /generate {"prompt": "..."}
31
+ # POST /v1/chat/completions {"messages": [...]}
32
+ ```
33
+
34
+ By default the system prompt is *"Sei un esperto programmatore COBOL."* and decoding is greedy
35
+ (`--temperature 0.0`). Override with `--system` / `--temperature`.
36
+
37
+ ## Use it — Python
38
+
39
+ ```python
40
+ import skylar
41
+
42
+ m = skylar.load("Sophia-AI/SkylarCobol-390M") # or a local dir
43
+ print(m.generate("Complete a COBOL paragraph that stores the max of two numbers in RESULT."))
44
+
45
+ for delta in m.stream("..."): # streaming
46
+ print(delta, end="", flush=True)
47
+ ```
48
+
49
+ `skylar` also registers the architecture with 🤗 Transformers, so this works too:
50
+
51
+ ```python
52
+ import skylar # registers nano-transformer
53
+ from transformers import AutoModelForCausalLM
54
+ model = AutoModelForCausalLM.from_pretrained("Sophia-AI/SkylarCobol-390M")
55
+ ```
56
+
57
+ ## What's inside
58
+
59
+ The Skylar models use a custom decoder (`NanoTransformer`, Qwen3-style: RMSNorm + RoPE + GQA +
60
+ QK-Norm + SwiGLU), trained 100% from scratch (no third-party pretrained weights). This package
61
+ vendors the architecture so the published weights load anywhere — no private framework needed.
62
+
63
+ ## License
64
+
65
+ Apache-2.0. Models & code IP: A. Ivanovitch (Sophia AI).
@@ -0,0 +1,38 @@
1
+ [build-system]
2
+ requires = ["setuptools>=64"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "skylar"
7
+ version = "0.1.0"
8
+ description = "Skylar — local, sovereign, from-scratch LLMs. CLI + loader for the Skylar model family (COBOL specialist & more)."
9
+ readme = "README.md"
10
+ requires-python = ">=3.9"
11
+ license = { text = "Apache-2.0" }
12
+ authors = [{ name = "A. Ivanovitch" }]
13
+ keywords = ["llm", "cobol", "code-generation", "sovereign-ai", "from-scratch"]
14
+ classifiers = [
15
+ "License :: OSI Approved :: Apache Software License",
16
+ "Programming Language :: Python :: 3",
17
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
18
+ ]
19
+ dependencies = [
20
+ "torch>=2.1",
21
+ "transformers>=4.40",
22
+ "tokenizers>=0.15",
23
+ "huggingface_hub>=0.20",
24
+ "rich>=13.0",
25
+ ]
26
+
27
+ [project.optional-dependencies]
28
+ serve = ["fastapi>=0.100", "uvicorn>=0.23", "pydantic>=2.0"]
29
+
30
+ [project.urls]
31
+ "Models" = "https://huggingface.co/Sophia-AI"
32
+
33
+ [project.scripts]
34
+ skylar = "skylar.cli:main"
35
+
36
+ [tool.setuptools.packages.find]
37
+ where = ["."]
38
+ include = ["skylar*"]
skylar-0.1.0/setup.cfg ADDED
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,24 @@
1
+ """Skylar — local, sovereign, from-scratch LLMs (COBOL specialist & more)."""
2
+ from .core import Skylar, load, DEFAULT_MODEL, COBOL_SYSTEM
3
+ from .config import NanoTransformerConfig
4
+ from .decoder import NanoTransformer
5
+ from .chatml import encode_chatml
6
+
7
+ __version__ = "0.1.0"
8
+ __all__ = ["Skylar", "load", "NanoTransformer", "NanoTransformerConfig",
9
+ "encode_chatml", "DEFAULT_MODEL", "COBOL_SYSTEM", "__version__"]
10
+
11
+
12
+ def _register_auto():
13
+ """Make AutoConfig/AutoModelForCausalLM aware of the custom arch, so
14
+ `AutoModelForCausalLM.from_pretrained(repo)` works after `import skylar`.
15
+ Best-effort: never break import if transformers internals change."""
16
+ try:
17
+ from transformers import AutoConfig, AutoModelForCausalLM
18
+ AutoConfig.register("nano-transformer", NanoTransformerConfig)
19
+ AutoModelForCausalLM.register(NanoTransformerConfig, NanoTransformer)
20
+ except Exception:
21
+ pass
22
+
23
+
24
+ _register_auto()
@@ -0,0 +1,112 @@
1
+ """
2
+ Chat dataset utilities for Supervised Fine-Tuning (SFT).
3
+
4
+ Supports:
5
+ - ChatML format (same as Qwen, Mistral, OpenAI)
6
+ The key insight: a "chat model" is just a base model fine-tuned on
7
+ structured conversations. The structure is enforced by special tokens.
8
+ """
9
+
10
+ import json
11
+
12
+
13
+ # ─────────────────────────────────────────────────────────────
14
+ # CHAT FORMAT (ChatML)
15
+ # ─────────────────────────────────────────────────────────────
16
+ #
17
+ # ChatML is the standard used by Qwen, Mistral, OpenAI, etc.
18
+ # It wraps each message in special tokens:
19
+ #
20
+ # <|im_start|>system
21
+ # You are a helpful assistant.<|im_end|>
22
+ # <|im_start|>user
23
+ # What is 2+2?<|im_end|>
24
+ # <|im_start|>assistant
25
+ # 2+2 equals 4.<|im_end|>
26
+ #
27
+ # During training, we only compute loss on the ASSISTANT tokens.
28
+ # The model learns: "given this conversation so far, what should
29
+ # the assistant say next?"
30
+ # ─────────────────────────────────────────────────────────────
31
+
32
+ def get_chatml_ids(tokenizer):
33
+ """Works with both special-token and sub-token tokenizers."""
34
+ ims_id = tokenizer.token_to_id("<|im_start|>")
35
+ if ims_id is not None:
36
+ return [ims_id], [tokenizer.token_to_id("<|im_end|>")]
37
+ else:
38
+ return (
39
+ tokenizer.encode("<|im_start|>", add_special_tokens=False).ids,
40
+ tokenizer.encode("<|im_end|>", add_special_tokens=False).ids,
41
+ )
42
+
43
+
44
+ def create_loss_mask(messages, tokenizer):
45
+ """Loss mask for ChatML. Works with both special-token and sub-token tokenizers."""
46
+ ims_ids, ime_ids = get_chatml_ids(tokenizer)
47
+
48
+ all_token_ids = []
49
+ all_labels = []
50
+
51
+ for i, msg in enumerate(messages):
52
+ is_assistant = (msg["role"] == "assistant")
53
+
54
+ all_token_ids.extend(ims_ids)
55
+ all_labels.extend([-100] * len(ims_ids))
56
+
57
+ role_ids = tokenizer.encode(msg["role"] + "\n", add_special_tokens=False).ids
58
+ all_token_ids.extend(role_ids)
59
+ all_labels.extend([-100] * len(role_ids))
60
+
61
+ content_ids = tokenizer.encode(msg["content"], add_special_tokens=False).ids
62
+ all_token_ids.extend(content_ids)
63
+ if is_assistant:
64
+ all_labels.extend(list(content_ids))
65
+ else:
66
+ all_labels.extend([-100] * len(content_ids))
67
+
68
+ all_token_ids.extend(ime_ids)
69
+ if is_assistant:
70
+ all_labels.extend(list(ime_ids))
71
+ else:
72
+ all_labels.extend([-100] * len(ime_ids))
73
+
74
+ if i < len(messages) - 1:
75
+ sep_ids = tokenizer.encode("\n", add_special_tokens=False).ids
76
+ all_token_ids.extend(sep_ids)
77
+ all_labels.extend([-100] * len(sep_ids))
78
+
79
+ return all_token_ids, all_labels
80
+
81
+
82
+ def encode_chatml(messages, tokenizer, add_generation_prompt=False):
83
+ """Encode ChatML. Works with both special-token and sub-token tokenizers."""
84
+ ims_ids, ime_ids = get_chatml_ids(tokenizer)
85
+
86
+ all_ids = []
87
+ for i, msg in enumerate(messages):
88
+ all_ids.extend(ims_ids)
89
+ all_ids.extend(tokenizer.encode(msg["role"] + "\n", add_special_tokens=False).ids)
90
+ all_ids.extend(tokenizer.encode(msg["content"], add_special_tokens=False).ids)
91
+ all_ids.extend(ime_ids)
92
+ if i < len(messages) - 1:
93
+ all_ids.extend(tokenizer.encode("\n", add_special_tokens=False).ids)
94
+
95
+ if add_generation_prompt:
96
+ all_ids.extend(tokenizer.encode("\n", add_special_tokens=False).ids)
97
+ all_ids.extend(ims_ids)
98
+ all_ids.extend(tokenizer.encode("assistant\n", add_special_tokens=False).ids)
99
+
100
+ return all_ids
101
+
102
+
103
+ def load_dataset_jsonl(filepath):
104
+ """Load a JSONL dataset."""
105
+ examples = []
106
+ with open(filepath, "r", encoding="utf-8") as f:
107
+ for line in f:
108
+ line = line.strip()
109
+ if line:
110
+ examples.append(json.loads(line))
111
+ print(f" Loaded {len(examples)} examples from {filepath}")
112
+ return examples
@@ -0,0 +1,171 @@
1
+ """`skylar` command-line interface: chat · generate · serve."""
2
+ import argparse
3
+ import sys
4
+
5
+ DEFAULT_MODEL = "Sophia-AI/SkylarCobol-390M"
6
+
7
+
8
+ def _add_common(sp):
9
+ sp.add_argument("--model", default=DEFAULT_MODEL,
10
+ help="HF repo id (es. Sophia-AI/SkylarCobol-390M) o cartella locale")
11
+ sp.add_argument("--device", default=None, help="cuda | cpu (auto se omesso)")
12
+ sp.add_argument("--system", default=None, help="system prompt (default: esperto COBOL)")
13
+ sp.add_argument("--max-new", dest="max_new", type=int, default=512)
14
+ sp.add_argument("--temperature", type=float, default=0.0,
15
+ help="0.0 = greedy deterministico (default)")
16
+
17
+
18
+ def cmd_generate(args):
19
+ from .core import Skylar, COBOL_SYSTEM
20
+ sk = Skylar.load(args.model, device=args.device)
21
+ system = args.system if args.system is not None else COBOL_SYSTEM
22
+ print(sk.generate(args.prompt, system=system, max_new_tokens=args.max_new,
23
+ temperature=args.temperature, seed=args.seed))
24
+
25
+
26
+ def cmd_chat(args):
27
+ from .core import Skylar, COBOL_SYSTEM
28
+ try:
29
+ from rich.console import Console
30
+ from rich.panel import Panel
31
+ console = Console()
32
+ except Exception:
33
+ console = None
34
+
35
+ print(f"Carico {args.model} ...", file=sys.stderr)
36
+ sk = Skylar.load(args.model, device=args.device)
37
+ system = args.system if args.system is not None else COBOL_SYSTEM
38
+ head = f"Skylar · {args.model}\ndevice: {sk.device} · system: {system!r}\n('exit' o Ctrl-D per uscire)"
39
+ if console:
40
+ console.print(Panel(head, title="skylar chat", border_style="cyan"))
41
+ else:
42
+ print(head)
43
+
44
+ while True:
45
+ try:
46
+ user = input("\n\033[1m›\033[0m ").strip()
47
+ except (EOFError, KeyboardInterrupt):
48
+ print()
49
+ break
50
+ if user.lower() in ("exit", "quit", ":q"):
51
+ break
52
+ if not user:
53
+ continue
54
+ for delta in sk.stream(user, system=system, max_new_tokens=args.max_new,
55
+ temperature=args.temperature):
56
+ sys.stdout.write(delta)
57
+ sys.stdout.flush()
58
+ print()
59
+
60
+
61
+ def cmd_cobol(args):
62
+ from .core import Skylar
63
+ from .cobol import EXAMPLE_STUB, complete_cobol, syntax_ok
64
+ if args.example:
65
+ stub = EXAMPLE_STUB
66
+ elif args.stub_file:
67
+ stub = open(args.stub_file).read()
68
+ else:
69
+ print("usa --example oppure --stub-file FILE", file=sys.stderr)
70
+ sys.exit(1)
71
+ print(f"Carico {args.model} ...", file=sys.stderr)
72
+ sk = Skylar.load(args.model, device=args.device)
73
+ prog = complete_cobol(sk, stub, max_new_tokens=args.max_new, temperature=args.temperature)
74
+ print(prog)
75
+ if args.compile:
76
+ ok = syntax_ok(prog)
77
+ if ok is None:
78
+ print("\n[cobc non installato — salto il check sintassi]", file=sys.stderr)
79
+ else:
80
+ print(f"\n[cobc -fsyntax-only: {'OK, compila ✓' if ok else 'errori di sintassi ✗'}]",
81
+ file=sys.stderr)
82
+
83
+
84
+ def cmd_serve(args):
85
+ try:
86
+ import uvicorn
87
+ from fastapi import FastAPI
88
+ from pydantic import BaseModel
89
+ except Exception:
90
+ print("`skylar serve` richiede gli extra: pip install 'skylar[serve]'", file=sys.stderr)
91
+ sys.exit(1)
92
+ from .core import Skylar, COBOL_SYSTEM
93
+
94
+ sk = Skylar.load(args.model, device=args.device)
95
+ app = FastAPI(title="Skylar", version="0.1.0")
96
+
97
+ class GenReq(BaseModel):
98
+ prompt: str
99
+ system: str = COBOL_SYSTEM
100
+ max_new_tokens: int = 512
101
+ temperature: float = 0.0
102
+
103
+ @app.get("/health")
104
+ def health():
105
+ return {"status": "ok", "model": args.model, "device": sk.device}
106
+
107
+ @app.post("/generate")
108
+ def generate(r: GenReq):
109
+ return {"completion": sk.generate(r.prompt, system=r.system,
110
+ max_new_tokens=r.max_new_tokens,
111
+ temperature=r.temperature)}
112
+
113
+ @app.post("/v1/chat/completions")
114
+ def chat_completions(body: dict):
115
+ msgs = body.get("messages", [])
116
+ system = next((m["content"] for m in msgs if m.get("role") == "system"), COBOL_SYSTEM)
117
+ user = next((m["content"] for m in reversed(msgs) if m.get("role") == "user"), "")
118
+ text = sk.generate(user, system=system,
119
+ max_new_tokens=body.get("max_tokens", 512),
120
+ temperature=body.get("temperature", 0.0))
121
+ return {"object": "chat.completion", "model": args.model,
122
+ "choices": [{"index": 0, "message": {"role": "assistant", "content": text},
123
+ "finish_reason": "stop"}]}
124
+
125
+ print(f"Skylar serve su http://{args.host}:{args.port} (POST /generate, /v1/chat/completions)")
126
+ uvicorn.run(app, host=args.host, port=args.port)
127
+
128
+
129
+ def main(argv=None):
130
+ p = argparse.ArgumentParser(
131
+ prog="skylar",
132
+ description="Skylar — LLM locali e sovrani, from-scratch (specialista COBOL & altri).")
133
+ p.add_argument("--version", action="store_true", help="stampa la versione ed esci")
134
+ sub = p.add_subparsers(dest="cmd")
135
+
136
+ g = sub.add_parser("generate", help="una risposta singola a un prompt")
137
+ _add_common(g)
138
+ g.add_argument("--prompt", required=True)
139
+ g.add_argument("--seed", type=int, default=None)
140
+ g.set_defaults(func=cmd_generate)
141
+
142
+ c = sub.add_parser("chat", help="REPL interattiva in streaming")
143
+ _add_common(c)
144
+ c.set_defaults(func=cmd_chat)
145
+
146
+ co = sub.add_parser("cobol", help="completa uno stub COBOL in un programma intero")
147
+ _add_common(co)
148
+ co.add_argument("--stub-file", default=None, help="file con uno stub COBOLEval-style")
149
+ co.add_argument("--example", action="store_true", help="usa lo stub d'esempio incluso")
150
+ co.add_argument("--compile", action="store_true", help="verifica la sintassi con GnuCOBOL")
151
+ co.set_defaults(func=cmd_cobol, max_new=900)
152
+
153
+ s = sub.add_parser("serve", help="server HTTP (OpenAI-compatibile) — extra [serve]")
154
+ _add_common(s)
155
+ s.add_argument("--host", default="127.0.0.1")
156
+ s.add_argument("--port", type=int, default=8000)
157
+ s.set_defaults(func=cmd_serve)
158
+
159
+ args = p.parse_args(argv)
160
+ if args.version:
161
+ from . import __version__
162
+ print(f"skylar {__version__}")
163
+ return
164
+ if not getattr(args, "func", None):
165
+ p.print_help()
166
+ return
167
+ args.func(args)
168
+
169
+
170
+ if __name__ == "__main__":
171
+ main()
@@ -0,0 +1,115 @@
1
+ """COBOL-aware helpers for the SkylarCobol model.
2
+
3
+ The model is trained to *complete* a COBOLEval-style stub: given a fixed-format COBOL skeleton
4
+ (IDENTIFICATION/ENVIRONMENT/DATA/LINKAGE divisions + the task as comments, ending at
5
+ `WORKING-STORAGE SECTION.`), it emits the WORKING-STORAGE entries + PROCEDURE DIVISION as a
6
+ fenced ```cobol block. `complete_cobol()` wraps the prompt exactly like the training/eval harness
7
+ and reassembles a full, compilable program — so you get real COBOL, not a fragment.
8
+ """
9
+ import re
10
+
11
+ from .core import COBOL_SYSTEM
12
+
13
+ _EVAL_USER = (
14
+ "Complete the following COBOL subprogram. Output ONLY the WORKING-STORAGE SECTION "
15
+ "entries and the PROCEDURE DIVISION USING LINKED-ITEMS (do NOT repeat IDENTIFICATION/"
16
+ "ENVIRONMENT/DATA/LINKAGE), storing the answer in RESULT, ending with END PROGRAM.\n"
17
+ "```cobol\n{stub}\n```"
18
+ )
19
+
20
+
21
+ def extract_code_block(src):
22
+ m = re.search(r"```(?:cobol)?\s*\n(.*?)```", src, re.DOTALL | re.IGNORECASE)
23
+ return m.group(1) if m else src
24
+
25
+
26
+ def swap_sections(src):
27
+ ws, lk, proc, begin = [], [], [], []
28
+ cur = begin
29
+ for line in src.split("\n"):
30
+ s = line.strip().upper()
31
+ if s.startswith("WORKING-STORAGE SECTION."):
32
+ cur = ws
33
+ elif s.startswith("LINKAGE SECTION."):
34
+ cur = lk
35
+ elif s.startswith("PROCEDURE DIVISION"):
36
+ cur = proc
37
+ line = " PROCEDURE DIVISION USING LINKED-ITEMS."
38
+ cur.append(line)
39
+ return "\n".join(begin + ws + lk + proc)
40
+
41
+
42
+ def _program_id(stub):
43
+ m = re.search(r"(?im)^\s*PROGRAM-ID\.\s*([A-Za-z0-9-]+)", stub)
44
+ return m.group(1) if m else "SOLUTION"
45
+
46
+
47
+ def construct(stub, completion, entry_point):
48
+ if "IDENTIFICATION DIVISION" in completion.upper():
49
+ prog = completion
50
+ else:
51
+ sol = completion
52
+ if sol.strip().startswith("WORKING-STORAGE SECTION."):
53
+ sol = sol.replace("WORKING-STORAGE SECTION.", "", 1)
54
+ prog = f"{stub}\n{sol}"
55
+ prog = swap_sections(prog)
56
+ name = entry_point.upper().replace("_", "-")
57
+ prog = re.sub(r"(?im)^[ \t]*END[ \t]+PROGRAM\b.*$", "", prog).rstrip()
58
+ prog += f"\n END PROGRAM {name}.\n"
59
+ return prog
60
+
61
+
62
+ def complete_cobol(sk, stub, entry_point=None, max_new_tokens=900, temperature=0.0):
63
+ """Return a full, reassembled COBOL program for a COBOLEval-style stub."""
64
+ name = entry_point or _program_id(stub)
65
+ raw = sk.generate(_EVAL_USER.format(stub=stub), system=COBOL_SYSTEM,
66
+ max_new_tokens=max_new_tokens, temperature=temperature)
67
+ return construct(stub, extract_code_block(raw), name)
68
+
69
+
70
+ def syntax_ok(program_text):
71
+ """Best-effort: does GnuCOBOL accept it syntactically? (None if cobc missing).
72
+
73
+ Uses fixed-format (COBOLEval programs are column-sensitive). Honors a COBC env var so a
74
+ non-PATH GnuCOBOL build can be pointed at explicitly."""
75
+ import shutil, subprocess, tempfile, os
76
+ cobc = os.environ.get("COBC", "cobc")
77
+ if not (os.path.isfile(cobc) or shutil.which(cobc)):
78
+ return None
79
+ with tempfile.TemporaryDirectory() as d:
80
+ f = os.path.join(d, "prog.cbl")
81
+ open(f, "w").write(program_text)
82
+ r = subprocess.run([cobc, "-fsyntax-only", "-fformat=fixed", "-w", f],
83
+ capture_output=True, text=True)
84
+ return r.returncode == 0
85
+
86
+
87
+ # sample stub for `skylar cobol --example` — a task the model handles well (increment a list).
88
+ # (COBOLEval-style fixed format; the model emits WORKING-STORAGE + PROCEDURE, we reassemble.)
89
+ EXAMPLE_STUB = """\
90
+ IDENTIFICATION DIVISION.
91
+ PROGRAM-ID. INCR-LIST.
92
+
93
+ ENVIRONMENT DIVISION.
94
+
95
+ INPUT-OUTPUT SECTION.
96
+
97
+ DATA DIVISION.
98
+
99
+ LINKAGE SECTION.
100
+
101
+ 01 LINKED-ITEMS.
102
+ 05 L-L OCCURS 3 TIMES INDEXED BY NI PIC S9(10).
103
+ 05 RESULT OCCURS 100 TIMES INDEXED BY NJ PIC S9(10).
104
+
105
+ * Return list with elements incremented by 1.
106
+ * >>> incr_list([1, 2, 3])
107
+ * [2, 3, 4]
108
+ * >>> incr_list([5, 3, 5])
109
+ * [6, 4, 6]
110
+
111
+ * Complete the WORKING-STORAGE SECTION and the PROCEDURE DIVISION
112
+ * Store the result in the RESULT variable and mark the end of your program with END PROGRAM
113
+
114
+ WORKING-STORAGE SECTION.
115
+ """