gotoken 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,341 @@
1
+ # This file is automatically @generated by Cargo.
2
+ # It is not intended for manual editing.
3
+ version = 4
4
+
5
+ [[package]]
6
+ name = "aho-corasick"
7
+ version = "1.1.4"
8
+ source = "registry+https://github.com/rust-lang/crates.io-index"
9
+ checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301"
10
+ dependencies = [
11
+ "memchr",
12
+ ]
13
+
14
+ [[package]]
15
+ name = "autocfg"
16
+ version = "1.5.1"
17
+ source = "registry+https://github.com/rust-lang/crates.io-index"
18
+ checksum = "f2032f911046de80f0a198e0901378627c33f59ea0ac00e363d481118bd70a53"
19
+
20
+ [[package]]
21
+ name = "cfg-if"
22
+ version = "1.0.4"
23
+ source = "registry+https://github.com/rust-lang/crates.io-index"
24
+ checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801"
25
+
26
+ [[package]]
27
+ name = "crossbeam-deque"
28
+ version = "0.8.6"
29
+ source = "registry+https://github.com/rust-lang/crates.io-index"
30
+ checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51"
31
+ dependencies = [
32
+ "crossbeam-epoch",
33
+ "crossbeam-utils",
34
+ ]
35
+
36
+ [[package]]
37
+ name = "crossbeam-epoch"
38
+ version = "0.9.18"
39
+ source = "registry+https://github.com/rust-lang/crates.io-index"
40
+ checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e"
41
+ dependencies = [
42
+ "crossbeam-utils",
43
+ ]
44
+
45
+ [[package]]
46
+ name = "crossbeam-utils"
47
+ version = "0.8.21"
48
+ source = "registry+https://github.com/rust-lang/crates.io-index"
49
+ checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28"
50
+
51
+ [[package]]
52
+ name = "either"
53
+ version = "1.16.0"
54
+ source = "registry+https://github.com/rust-lang/crates.io-index"
55
+ checksum = "91622ff5e7162018101f2fea40d6ebf4a78bbe5a49736a2020649edf9693679e"
56
+
57
+ [[package]]
58
+ name = "gotoken"
59
+ version = "0.1.0"
60
+ dependencies = [
61
+ "phf",
62
+ "pyo3",
63
+ "rayon",
64
+ "regex",
65
+ ]
66
+
67
+ [[package]]
68
+ name = "heck"
69
+ version = "0.5.0"
70
+ source = "registry+https://github.com/rust-lang/crates.io-index"
71
+ checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
72
+
73
+ [[package]]
74
+ name = "indoc"
75
+ version = "2.0.7"
76
+ source = "registry+https://github.com/rust-lang/crates.io-index"
77
+ checksum = "79cf5c93f93228cf8efb3ba362535fb11199ac548a09ce117c9b1adc3030d706"
78
+ dependencies = [
79
+ "rustversion",
80
+ ]
81
+
82
+ [[package]]
83
+ name = "libc"
84
+ version = "0.2.186"
85
+ source = "registry+https://github.com/rust-lang/crates.io-index"
86
+ checksum = "68ab91017fe16c622486840e4c83c9a37afeff978bd239b5293d61ece587de66"
87
+
88
+ [[package]]
89
+ name = "memchr"
90
+ version = "2.8.2"
91
+ source = "registry+https://github.com/rust-lang/crates.io-index"
92
+ checksum = "88904434abc2901f197fe8cc55f0445e7ded921dba5911dad2e2b39b48e663c4"
93
+
94
+ [[package]]
95
+ name = "memoffset"
96
+ version = "0.9.1"
97
+ source = "registry+https://github.com/rust-lang/crates.io-index"
98
+ checksum = "488016bfae457b036d996092f6cb448677611ce4449e970ceaf42695203f218a"
99
+ dependencies = [
100
+ "autocfg",
101
+ ]
102
+
103
+ [[package]]
104
+ name = "once_cell"
105
+ version = "1.21.4"
106
+ source = "registry+https://github.com/rust-lang/crates.io-index"
107
+ checksum = "9f7c3e4beb33f85d45ae3e3a1792185706c8e16d043238c593331cc7cd313b50"
108
+
109
+ [[package]]
110
+ name = "phf"
111
+ version = "0.11.3"
112
+ source = "registry+https://github.com/rust-lang/crates.io-index"
113
+ checksum = "1fd6780a80ae0c52cc120a26a1a42c1ae51b247a253e4e06113d23d2c2edd078"
114
+ dependencies = [
115
+ "phf_macros",
116
+ "phf_shared",
117
+ ]
118
+
119
+ [[package]]
120
+ name = "phf_generator"
121
+ version = "0.11.3"
122
+ source = "registry+https://github.com/rust-lang/crates.io-index"
123
+ checksum = "3c80231409c20246a13fddb31776fb942c38553c51e871f8cbd687a4cfb5843d"
124
+ dependencies = [
125
+ "phf_shared",
126
+ "rand",
127
+ ]
128
+
129
+ [[package]]
130
+ name = "phf_macros"
131
+ version = "0.11.3"
132
+ source = "registry+https://github.com/rust-lang/crates.io-index"
133
+ checksum = "f84ac04429c13a7ff43785d75ad27569f2951ce0ffd30a3321230db2fc727216"
134
+ dependencies = [
135
+ "phf_generator",
136
+ "phf_shared",
137
+ "proc-macro2",
138
+ "quote",
139
+ "syn",
140
+ ]
141
+
142
+ [[package]]
143
+ name = "phf_shared"
144
+ version = "0.11.3"
145
+ source = "registry+https://github.com/rust-lang/crates.io-index"
146
+ checksum = "67eabc2ef2a60eb7faa00097bd1ffdb5bd28e62bf39990626a582201b7a754e5"
147
+ dependencies = [
148
+ "siphasher",
149
+ ]
150
+
151
+ [[package]]
152
+ name = "portable-atomic"
153
+ version = "1.13.1"
154
+ source = "registry+https://github.com/rust-lang/crates.io-index"
155
+ checksum = "c33a9471896f1c69cecef8d20cbe2f7accd12527ce60845ff44c153bb2a21b49"
156
+
157
+ [[package]]
158
+ name = "proc-macro2"
159
+ version = "1.0.106"
160
+ source = "registry+https://github.com/rust-lang/crates.io-index"
161
+ checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934"
162
+ dependencies = [
163
+ "unicode-ident",
164
+ ]
165
+
166
+ [[package]]
167
+ name = "pyo3"
168
+ version = "0.22.6"
169
+ source = "registry+https://github.com/rust-lang/crates.io-index"
170
+ checksum = "f402062616ab18202ae8319da13fa4279883a2b8a9d9f83f20dbade813ce1884"
171
+ dependencies = [
172
+ "cfg-if",
173
+ "indoc",
174
+ "libc",
175
+ "memoffset",
176
+ "once_cell",
177
+ "portable-atomic",
178
+ "pyo3-build-config",
179
+ "pyo3-ffi",
180
+ "pyo3-macros",
181
+ "unindent",
182
+ ]
183
+
184
+ [[package]]
185
+ name = "pyo3-build-config"
186
+ version = "0.22.6"
187
+ source = "registry+https://github.com/rust-lang/crates.io-index"
188
+ checksum = "b14b5775b5ff446dd1056212d778012cbe8a0fbffd368029fd9e25b514479c38"
189
+ dependencies = [
190
+ "once_cell",
191
+ "target-lexicon",
192
+ ]
193
+
194
+ [[package]]
195
+ name = "pyo3-ffi"
196
+ version = "0.22.6"
197
+ source = "registry+https://github.com/rust-lang/crates.io-index"
198
+ checksum = "9ab5bcf04a2cdcbb50c7d6105de943f543f9ed92af55818fd17b660390fc8636"
199
+ dependencies = [
200
+ "libc",
201
+ "pyo3-build-config",
202
+ ]
203
+
204
+ [[package]]
205
+ name = "pyo3-macros"
206
+ version = "0.22.6"
207
+ source = "registry+https://github.com/rust-lang/crates.io-index"
208
+ checksum = "0fd24d897903a9e6d80b968368a34e1525aeb719d568dba8b3d4bfa5dc67d453"
209
+ dependencies = [
210
+ "proc-macro2",
211
+ "pyo3-macros-backend",
212
+ "quote",
213
+ "syn",
214
+ ]
215
+
216
+ [[package]]
217
+ name = "pyo3-macros-backend"
218
+ version = "0.22.6"
219
+ source = "registry+https://github.com/rust-lang/crates.io-index"
220
+ checksum = "36c011a03ba1e50152b4b394b479826cad97e7a21eb52df179cd91ac411cbfbe"
221
+ dependencies = [
222
+ "heck",
223
+ "proc-macro2",
224
+ "pyo3-build-config",
225
+ "quote",
226
+ "syn",
227
+ ]
228
+
229
+ [[package]]
230
+ name = "quote"
231
+ version = "1.0.45"
232
+ source = "registry+https://github.com/rust-lang/crates.io-index"
233
+ checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924"
234
+ dependencies = [
235
+ "proc-macro2",
236
+ ]
237
+
238
+ [[package]]
239
+ name = "rand"
240
+ version = "0.8.6"
241
+ source = "registry+https://github.com/rust-lang/crates.io-index"
242
+ checksum = "5ca0ecfa931c29007047d1bc58e623ab12e5590e8c7cc53200d5202b69266d8a"
243
+ dependencies = [
244
+ "rand_core",
245
+ ]
246
+
247
+ [[package]]
248
+ name = "rand_core"
249
+ version = "0.6.4"
250
+ source = "registry+https://github.com/rust-lang/crates.io-index"
251
+ checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
252
+
253
+ [[package]]
254
+ name = "rayon"
255
+ version = "1.12.0"
256
+ source = "registry+https://github.com/rust-lang/crates.io-index"
257
+ checksum = "fb39b166781f92d482534ef4b4b1b2568f42613b53e5b6c160e24cfbfa30926d"
258
+ dependencies = [
259
+ "either",
260
+ "rayon-core",
261
+ ]
262
+
263
+ [[package]]
264
+ name = "rayon-core"
265
+ version = "1.13.0"
266
+ source = "registry+https://github.com/rust-lang/crates.io-index"
267
+ checksum = "22e18b0f0062d30d4230b2e85ff77fdfe4326feb054b9783a3460d8435c8ab91"
268
+ dependencies = [
269
+ "crossbeam-deque",
270
+ "crossbeam-utils",
271
+ ]
272
+
273
+ [[package]]
274
+ name = "regex"
275
+ version = "1.12.4"
276
+ source = "registry+https://github.com/rust-lang/crates.io-index"
277
+ checksum = "f1292b7759ae1cb9ec195452d1390a074f0cd8541ab7a5a8c31cd6db45d4a6ba"
278
+ dependencies = [
279
+ "aho-corasick",
280
+ "memchr",
281
+ "regex-automata",
282
+ "regex-syntax",
283
+ ]
284
+
285
+ [[package]]
286
+ name = "regex-automata"
287
+ version = "0.4.14"
288
+ source = "registry+https://github.com/rust-lang/crates.io-index"
289
+ checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f"
290
+ dependencies = [
291
+ "aho-corasick",
292
+ "memchr",
293
+ "regex-syntax",
294
+ ]
295
+
296
+ [[package]]
297
+ name = "regex-syntax"
298
+ version = "0.8.11"
299
+ source = "registry+https://github.com/rust-lang/crates.io-index"
300
+ checksum = "d6f6ff9a378485b298a5286656da665ba74413d36db0979633275d2e708145d4"
301
+
302
+ [[package]]
303
+ name = "rustversion"
304
+ version = "1.0.22"
305
+ source = "registry+https://github.com/rust-lang/crates.io-index"
306
+ checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d"
307
+
308
+ [[package]]
309
+ name = "siphasher"
310
+ version = "1.0.3"
311
+ source = "registry+https://github.com/rust-lang/crates.io-index"
312
+ checksum = "8ee5873ec9cce0195efcb7a4e9507a04cd49aec9c83d0389df45b1ef7ba2e649"
313
+
314
+ [[package]]
315
+ name = "syn"
316
+ version = "2.0.117"
317
+ source = "registry+https://github.com/rust-lang/crates.io-index"
318
+ checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99"
319
+ dependencies = [
320
+ "proc-macro2",
321
+ "quote",
322
+ "unicode-ident",
323
+ ]
324
+
325
+ [[package]]
326
+ name = "target-lexicon"
327
+ version = "0.12.16"
328
+ source = "registry+https://github.com/rust-lang/crates.io-index"
329
+ checksum = "61c41af27dd6d1e27b1b16b489db798443478cef1f06a660c96db617ba5de3b1"
330
+
331
+ [[package]]
332
+ name = "unicode-ident"
333
+ version = "1.0.24"
334
+ source = "registry+https://github.com/rust-lang/crates.io-index"
335
+ checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75"
336
+
337
+ [[package]]
338
+ name = "unindent"
339
+ version = "0.2.4"
340
+ source = "registry+https://github.com/rust-lang/crates.io-index"
341
+ checksum = "7264e107f553ccae879d21fbea1d6724ac785e8c3bfc762137959b5802826ef3"
@@ -0,0 +1,31 @@
1
+ [package]
2
+ name = "gotoken"
3
+ version = "0.1.0"
4
+ edition = "2021"
5
+ description = "Syntax-aware tokenizer for Bash and formal languages"
6
+ license = "MIT"
7
+
8
+ [lib]
9
+ name = "gotoken"
10
+ crate-types = ["rlib", "cdylib"]
11
+
12
+ [features]
13
+ default = ["rayon"]
14
+ rayon = ["dep:rayon"]
15
+ python = ["dep:pyo3"]
16
+
17
+ [dependencies]
18
+ phf = { version = "0.11", features = ["macros"] }
19
+ regex = "1.10"
20
+ rayon = { version = "1.10", optional = true }
21
+ # 0.22.x supporta Python 3.13
22
+ pyo3 = { version = "0.22", features = ["extension-module"], optional = true }
23
+
24
+ [dev-dependencies]
25
+
26
+ [profile.release]
27
+ opt-level = 3
28
+ lto = "fat"
29
+ codegen-units = 1
30
+ panic = "abort"
31
+ strip = "symbols"
gotoken-0.1.0/PKG-INFO ADDED
@@ -0,0 +1,9 @@
1
+ Metadata-Version: 2.4
2
+ Name: gotoken
3
+ Version: 0.1.0
4
+ Classifier: Programming Language :: Rust
5
+ Classifier: Programming Language :: Python :: Implementation :: CPython
6
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
7
+ Summary: Syntax-aware Bash tokenizer — Rust core, Python bindings
8
+ License: MIT
9
+ Requires-Python: >=3.9
@@ -0,0 +1,199 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Fix 2 bug in src/lexer.rs:
4
+ 1. try_dquote ora gestisce anche UnclosedQuote (stessa firma di try_squote)
5
+ 2. Pattern operator: || e |& devono precedere | nell'alternazione
6
+ 3. Il loop principale smista " a try_dquote e ' a try_squote
7
+
8
+ Esegui da ~/gotoken/: python3 patch_lexer2.py
9
+ """
10
+
11
+ path = "src/lexer.rs"
12
+ with open(path, "r") as f:
13
+ src = f.read()
14
+
15
+ # ── Fix 1: regex operator — || e |& prima di | ─────────────────────────────
16
+ OLD_REGEX = (
17
+ r' operator: Regex::new(\n'
18
+ r' r"^(?:2>&1|&>>|&>|>>=|<<=|<<<|<<-|<<|>>|>\||\|&|&&|\|\|;;|;;|;|2>>|2>|<>|>>|>|<|&|\|)"\n'
19
+ r' )?,'
20
+ )
21
+ NEW_REGEX = (
22
+ r' operator: Regex::new(\n'
23
+ r' r"^(?:2>&1|&>>|&>|>>=|<<=|<<<|<<-|<<|>>|>\||&&|\|\||;;|\|&|;|2>>|2>|<>|>|<|&|\|)"\n'
24
+ r' )?,'
25
+ )
26
+
27
+ # Usiamo replace su stringa letterale
28
+ OLD_REGEX_LIT = ''' operator: Regex::new(
29
+ r"^(?:2>&1|&>>|&>|>>=|<<=|<<<|<<-|<<|>>|>\\|\\|&|&&|\\|\\|;;|;;|;|2>>|2>|<>|>>|>|<|&|\\|)"
30
+ )?,'''
31
+
32
+ # Leggiamo la riga esatta dal file
33
+ import re
34
+ op_match = re.search(r'operator: Regex::new\(\s*r"([^"]+)"', src)
35
+ if op_match:
36
+ old_pattern = op_match.group(1)
37
+ print(f"Pattern operatore trovato: {old_pattern}")
38
+ # Nuova alternazione: || e |& prima del singolo |
39
+ new_pattern = r"^(?:2>&1|&>>|&>|>>=|<<=|<<<|<<-|<<|>>|>\||&&|\|\||;;|\|&|;|2>>|2>|<>|>|<|&|\|)"
40
+ src = src.replace(
41
+ f'r"{old_pattern}"',
42
+ f'r"{new_pattern}"',
43
+ 1
44
+ )
45
+ print("✓ Fix 1 applicato: priorità || e |& corretta nel pattern operator")
46
+ else:
47
+ print("WARN: pattern operator non trovato, Fix 1 saltato")
48
+
49
+ # ── Fix 2: try_dquote ora ritorna Option<Result<Token>> ────────────────────
50
+
51
+ OLD_DQUOTE = ''' /// Double-quoted string. Returns `None` if no opening `"` is present.
52
+ /// If `"` is present but unclosed the regex simply won\'t match, so we
53
+ /// detect it here and return an error via the `ZeroLengthFallback`
54
+ /// guard path — actually we surface it as `UnclosedQuote`.
55
+ fn try_dquote(&self, tail: &str, offset: usize) -> Option<Token> {
56
+ if !tail.starts_with(\'"\') {
57
+ return None;
58
+ }
59
+ // If the regex matches we have a closed string.
60
+ if let Some(m) = self.patterns.dquote.find(tail) {
61
+ let text = &tail[m.start()..m.end()];
62
+ return Some(Token::new(text, TokenKind::StringLiteral, offset));
63
+ }
64
+ // `"` found but no closing quote — we return a sentinel token whose
65
+ // `kind` is set to a special value; the caller checks this and
66
+ // converts it to an error via `transpose()`.
67
+ // We encode the error as a `ByteFallback` with len 0 as a signal —
68
+ // the caller sees `ZeroLengthFallback` ... actually cleaner to use
69
+ // a dedicated approach: return the error inline.
70
+ // We exploit the Option<Result<Token>> path: NOT this function.
71
+ // This function stays Option<Token>; unclosed " is caught below.
72
+ None
73
+ }'''
74
+
75
+ NEW_DQUOTE = ''' /// Double-quoted string. Returns `Option<Result<Token>>` so that
76
+ /// `UnclosedQuote` can bubble up when the closing `"` is missing.
77
+ fn try_dquote(
78
+ &self,
79
+ tail: &str,
80
+ offset: usize,
81
+ ) -> Option<std::result::Result<Token, TokenizerError>> {
82
+ if !tail.starts_with(\'"\') {
83
+ return None;
84
+ }
85
+ if let Some(m) = self.patterns.dquote.find(tail) {
86
+ let text = &tail[m.start()..m.end()];
87
+ return Some(Ok(Token::new(text, TokenKind::StringLiteral, offset)));
88
+ }
89
+ // Opening `"` found but no closing quote.
90
+ Some(Err(TokenizerError::UnclosedQuote {
91
+ quote_char: \'"\',
92
+ opened_at: offset,
93
+ }))
94
+ }'''
95
+
96
+ if OLD_DQUOTE in src:
97
+ src = src.replace(OLD_DQUOTE, NEW_DQUOTE, 1)
98
+ print("✓ Fix 2 applicato: try_dquote ora ritorna Option<Result<Token>>")
99
+ else:
100
+ print("WARN: corpo try_dquote non trovato esattamente — applico fix alternativo")
101
+ # Approccio alternativo: cerca e rimpiazza con regex
102
+ src = re.sub(
103
+ r'fn try_dquote\(&self, tail: &str, offset: usize\) -> Option<Token> \{[^}]+\}',
104
+ '''fn try_dquote(
105
+ &self,
106
+ tail: &str,
107
+ offset: usize,
108
+ ) -> Option<std::result::Result<Token, TokenizerError>> {
109
+ if !tail.starts_with(\'"\') {
110
+ return None;
111
+ }
112
+ if let Some(m) = self.patterns.dquote.find(tail) {
113
+ let text = &tail[m.start()..m.end()];
114
+ return Some(Ok(Token::new(text, TokenKind::StringLiteral, offset)));
115
+ }
116
+ Some(Err(TokenizerError::UnclosedQuote {
117
+ quote_char: \'"\',
118
+ opened_at: offset,
119
+ }))
120
+ }''',
121
+ src,
122
+ count=1,
123
+ flags=re.DOTALL
124
+ )
125
+ print("✓ Fix 2 applicato via regex fallback")
126
+
127
+ # ── Fix 3: loop principale — smista " a try_dquote, ' a try_squote ─────────
128
+
129
+ OLD_LOOP = ''' } else if tail.starts_with(\'"\') || tail.starts_with(\'\\'\') {
130
+ match self.try_squote(tail, cursor, input) {
131
+ Some(Ok(t)) => t,
132
+ Some(Err(e)) => return Err(e),
133
+ None => self.byte_fallback(tail, cursor),
134
+ }'''
135
+
136
+ NEW_LOOP = ''' } else if tail.starts_with(\'"\') {
137
+ match self.try_dquote(tail, cursor) {
138
+ Some(Ok(t)) => t,
139
+ Some(Err(e)) => return Err(e),
140
+ None => self.byte_fallback(tail, cursor),
141
+ }
142
+ } else if tail.starts_with(\'\\'\') {
143
+ match self.try_squote(tail, cursor, input) {
144
+ Some(Ok(t)) => t,
145
+ Some(Err(e)) => return Err(e),
146
+ None => self.byte_fallback(tail, cursor),
147
+ }'''
148
+
149
+ if OLD_LOOP in src:
150
+ src = src.replace(OLD_LOOP, NEW_LOOP, 1)
151
+ print("✓ Fix 3 applicato: loop smista \" a try_dquote e \\' a try_squote")
152
+ else:
153
+ print("WARN: pattern loop non trovato esattamente")
154
+ # Regex fallback
155
+ src = re.sub(
156
+ r"} else if tail\.starts_with\('\"'\) \|\| tail\.starts_with\('\\\\'\) \{.*?match self\.try_squote\(tail, cursor, input\) \{.*?None\s*=> self\.byte_fallback\(tail, cursor\),\s*\}",
157
+ """} else if tail.starts_with('"') {
158
+ match self.try_dquote(tail, cursor) {
159
+ Some(Ok(t)) => t,
160
+ Some(Err(e)) => return Err(e),
161
+ None => self.byte_fallback(tail, cursor),
162
+ }
163
+ } else if tail.starts_with('\\'') {
164
+ match self.try_squote(tail, cursor, input) {
165
+ Some(Ok(t)) => t,
166
+ Some(Err(e)) => return Err(e),
167
+ None => self.byte_fallback(tail, cursor),
168
+ }""",
169
+ src,
170
+ count=1,
171
+ flags=re.DOTALL
172
+ )
173
+ print("✓ Fix 3 applicato via regex fallback")
174
+
175
+ # ── Fix 4: rimuovi il check " da try_squote (ora lo gestisce try_dquote) ───
176
+
177
+ OLD_SQUOTE_DQUOTE_CHECK = ''' if tail.starts_with(\'"\') {
178
+ // Double-quote unclosed detection (try_dquote returned None above)
179
+ if self.patterns.dquote.find(tail).is_none() {
180
+ return Some(Err(TokenizerError::UnclosedQuote {
181
+ quote_char: \'"\',
182
+ opened_at: offset,
183
+ }));
184
+ }
185
+ }
186
+ if !tail.starts_with(\'\\'\') {'''
187
+
188
+ NEW_SQUOTE_START = ''' if !tail.starts_with(\'\\'\') {'''
189
+
190
+ if OLD_SQUOTE_DQUOTE_CHECK in src:
191
+ src = src.replace(OLD_SQUOTE_DQUOTE_CHECK, NEW_SQUOTE_START, 1)
192
+ print("✓ Fix 4 applicato: rimosso check \" duplicato da try_squote")
193
+ else:
194
+ print("WARN: check \" in try_squote non trovato — potrebbe essere già rimosso")
195
+
196
+ with open(path, "w") as f:
197
+ f.write(src)
198
+
199
+ print("\nTutti i fix applicati. Esegui: cargo test 2>&1")
@@ -0,0 +1,44 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Aggiunge i binding Python a src/lib.rs.
4
+ Esegui da ~/gotoken/: python3 patch_lib_python.py
5
+ """
6
+
7
+ path = "src/lib.rs"
8
+ with open(path, "r") as f:
9
+ src = f.read()
10
+
11
+ BLOCK = '''
12
+ // ── Python bindings (compilati solo con --features python) ───────────────────
13
+ #[cfg(feature = "python")]
14
+ pub mod python;
15
+
16
+ #[cfg(feature = "python")]
17
+ use pyo3::prelude::*;
18
+
19
+ /// Entry-point del modulo Python `gotoken`.
20
+ /// Chiamato automaticamente da maturin quando Python importa la libreria.
21
+ #[cfg(feature = "python")]
22
+ #[pymodule]
23
+ fn gotoken(m: &Bound<\'_, PyModule>) -> PyResult<()> {
24
+ python::gotoken(m)
25
+ }
26
+ '''
27
+
28
+ MARKER = "pub mod vocab;"
29
+
30
+ if "pub mod python;" in src:
31
+ print("Binding Python già presenti in lib.rs — nessuna modifica necessaria.")
32
+ else:
33
+ if MARKER not in src:
34
+ print(f"ERRORE: marker '{MARKER}' non trovato in lib.rs")
35
+ raise SystemExit(1)
36
+ src = src.replace(MARKER, MARKER + BLOCK, 1)
37
+ with open(path, "w") as f:
38
+ f.write(src)
39
+ print("✓ Binding Python aggiunti a lib.rs")
40
+
41
+ print("Ora esegui:")
42
+ print(" pip install maturin")
43
+ print(" maturin develop --features python,rayon")
44
+ print(" python3 test_gotoken.py")
@@ -0,0 +1,23 @@
1
+ [build-system]
2
+ requires = ["maturin>=1.5,<2.0"]
3
+ build-backend = "maturin"
4
+
5
+ [project]
6
+ name = "gotoken"
7
+ version = "0.1.0"
8
+ description = "Syntax-aware Bash tokenizer — Rust core, Python bindings"
9
+ requires-python = ">=3.9"
10
+ license = { text = "MIT" }
11
+ classifiers = [
12
+ "Programming Language :: Rust",
13
+ "Programming Language :: Python :: Implementation :: CPython",
14
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
15
+ ]
16
+
17
+ [tool.maturin]
18
+ # Attiva la feature "python" di Cargo che include pyo3
19
+ features = ["python", "rayon"]
20
+ # Funzione di entry-point del modulo PyO3 (deve matchare #[pymodule] fn gotoken)
21
+ module-name = "gotoken"
22
+ # Cartella sorgente Rust
23
+ manifest-path = "Cargo.toml"