goscript 0.2.6 → 0.2.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (264) hide show
  1. package/cmd/goscript/cmd-compile.go +7 -0
  2. package/cmd/goscript/cmd_compile_test.go +83 -0
  3. package/compiler/compile-request.go +3 -0
  4. package/compiler/compiler-cache.go +828 -0
  5. package/compiler/compiler-cache_test.go +705 -0
  6. package/compiler/config.go +2 -0
  7. package/compiler/index.test.ts +26 -1
  8. package/compiler/index.ts +5 -0
  9. package/compiler/lowered-program.go +31 -20
  10. package/compiler/lowering.go +349 -93
  11. package/compiler/lowering_bench_test.go +1 -0
  12. package/compiler/override-facts.go +309 -8
  13. package/compiler/override-parity-verifier.go +45 -1
  14. package/compiler/override-parity-verifier_test.go +100 -0
  15. package/compiler/override-registry_test.go +1 -0
  16. package/compiler/package-graph.go +40 -12
  17. package/compiler/package-graph_test.go +29 -0
  18. package/compiler/runtime-contract.go +8 -0
  19. package/compiler/service.go +98 -11
  20. package/compiler/skeleton_test.go +110 -14
  21. package/compiler/typescript-emitter.go +120 -23
  22. package/dist/compiler/index.d.ts +2 -0
  23. package/dist/compiler/index.js +3 -0
  24. package/dist/compiler/index.js.map +1 -1
  25. package/dist/gs/builtin/builtin.d.ts +24 -33
  26. package/dist/gs/builtin/builtin.js +54 -61
  27. package/dist/gs/builtin/builtin.js.map +1 -1
  28. package/dist/gs/builtin/hostio.d.ts +1 -0
  29. package/dist/gs/builtin/hostio.js +1 -1
  30. package/dist/gs/builtin/hostio.js.map +1 -1
  31. package/dist/gs/builtin/index.d.ts +1 -0
  32. package/dist/gs/builtin/index.js +1 -0
  33. package/dist/gs/builtin/index.js.map +1 -1
  34. package/dist/gs/builtin/panic.d.ts +18 -0
  35. package/dist/gs/builtin/panic.js +98 -0
  36. package/dist/gs/builtin/panic.js.map +1 -0
  37. package/dist/gs/builtin/slice.d.ts +10 -0
  38. package/dist/gs/builtin/slice.js +110 -53
  39. package/dist/gs/builtin/slice.js.map +1 -1
  40. package/dist/gs/builtin/type.js +15 -3
  41. package/dist/gs/builtin/type.js.map +1 -1
  42. package/dist/gs/builtin/varRef.d.ts +1 -1
  43. package/dist/gs/builtin/varRef.js +3 -2
  44. package/dist/gs/builtin/varRef.js.map +1 -1
  45. package/dist/gs/bytes/bytes.gs.js +51 -38
  46. package/dist/gs/bytes/bytes.gs.js.map +1 -1
  47. package/dist/gs/bytes/reader.gs.d.ts +1 -1
  48. package/dist/gs/bytes/reader.gs.js +6 -7
  49. package/dist/gs/bytes/reader.gs.js.map +1 -1
  50. package/dist/gs/cmp/index.d.ts +1 -1
  51. package/dist/gs/cmp/index.js +43 -10
  52. package/dist/gs/cmp/index.js.map +1 -1
  53. package/dist/gs/context/context.d.ts +2 -2
  54. package/dist/gs/context/context.js +1 -1
  55. package/dist/gs/context/context.js.map +1 -1
  56. package/dist/gs/embed/index.js +1 -1
  57. package/dist/gs/embed/index.js.map +1 -1
  58. package/dist/gs/encoding/binary/index.js +201 -8
  59. package/dist/gs/encoding/binary/index.js.map +1 -1
  60. package/dist/gs/encoding/json/index.d.ts +5 -0
  61. package/dist/gs/encoding/json/index.js +388 -25
  62. package/dist/gs/encoding/json/index.js.map +1 -1
  63. package/dist/gs/errors/errors.js +17 -24
  64. package/dist/gs/errors/errors.js.map +1 -1
  65. package/dist/gs/fmt/fmt.js +129 -35
  66. package/dist/gs/fmt/fmt.js.map +1 -1
  67. package/dist/gs/golang.org/x/crypto/cryptobyte/index.js +1 -1
  68. package/dist/gs/golang.org/x/crypto/cryptobyte/index.js.map +1 -1
  69. package/dist/gs/internal/bytealg/index.js +43 -8
  70. package/dist/gs/internal/bytealg/index.js.map +1 -1
  71. package/dist/gs/internal/byteorder/index.d.ts +2 -2
  72. package/dist/gs/internal/byteorder/index.js +2 -2
  73. package/dist/gs/internal/byteorder/index.js.map +1 -1
  74. package/dist/gs/io/fs/format.js +2 -2
  75. package/dist/gs/io/fs/format.js.map +1 -1
  76. package/dist/gs/io/fs/fs.d.ts +1 -1
  77. package/dist/gs/io/fs/fs.js +1 -1
  78. package/dist/gs/io/fs/fs.js.map +1 -1
  79. package/dist/gs/io/io.d.ts +21 -21
  80. package/dist/gs/io/io.js +49 -50
  81. package/dist/gs/io/io.js.map +1 -1
  82. package/dist/gs/math/bits/index.js +26 -8
  83. package/dist/gs/math/bits/index.js.map +1 -1
  84. package/dist/gs/math/copysign.gs.js +10 -17
  85. package/dist/gs/math/copysign.gs.js.map +1 -1
  86. package/dist/gs/math/pow.gs.js +5 -0
  87. package/dist/gs/math/pow.gs.js.map +1 -1
  88. package/dist/gs/math/signbit.gs.js +6 -2
  89. package/dist/gs/math/signbit.gs.js.map +1 -1
  90. package/dist/gs/mime/index.js +1 -0
  91. package/dist/gs/mime/index.js.map +1 -1
  92. package/dist/gs/net/http/index.d.ts +6 -6
  93. package/dist/gs/net/http/index.js +507 -43
  94. package/dist/gs/net/http/index.js.map +1 -1
  95. package/dist/gs/os/stat.gs.d.ts +2 -2
  96. package/dist/gs/os/types.gs.d.ts +1 -1
  97. package/dist/gs/os/types.gs.js +1 -1
  98. package/dist/gs/os/types.gs.js.map +1 -1
  99. package/dist/gs/os/types_js.gs.d.ts +1 -1
  100. package/dist/gs/os/types_js.gs.js +7 -7
  101. package/dist/gs/os/types_js.gs.js.map +1 -1
  102. package/dist/gs/os/types_unix.gs.d.ts +1 -1
  103. package/dist/gs/os/types_unix.gs.js +1 -1
  104. package/dist/gs/os/types_unix.gs.js.map +1 -1
  105. package/dist/gs/os/zero_copy_posix.gs.d.ts +1 -1
  106. package/dist/gs/os/zero_copy_posix.gs.js +1 -1
  107. package/dist/gs/os/zero_copy_posix.gs.js.map +1 -1
  108. package/dist/gs/path/filepath/match.js +8 -4
  109. package/dist/gs/path/filepath/match.js.map +1 -1
  110. package/dist/gs/path/filepath/path.js +216 -42
  111. package/dist/gs/path/filepath/path.js.map +1 -1
  112. package/dist/gs/path/match.js +6 -3
  113. package/dist/gs/path/match.js.map +1 -1
  114. package/dist/gs/reflect/type.d.ts +5 -4
  115. package/dist/gs/reflect/type.js +29 -11
  116. package/dist/gs/reflect/type.js.map +1 -1
  117. package/dist/gs/slices/slices.js +11 -11
  118. package/dist/gs/slices/slices.js.map +1 -1
  119. package/dist/gs/strconv/atof.gs.js +156 -43
  120. package/dist/gs/strconv/atof.gs.js.map +1 -1
  121. package/dist/gs/strconv/atoi.gs.d.ts +3 -2
  122. package/dist/gs/strconv/atoi.gs.js +86 -67
  123. package/dist/gs/strconv/atoi.gs.js.map +1 -1
  124. package/dist/gs/strconv/ftoa.gs.js +73 -3
  125. package/dist/gs/strconv/ftoa.gs.js.map +1 -1
  126. package/dist/gs/strconv/itoa.gs.d.ts +4 -4
  127. package/dist/gs/strconv/itoa.gs.js +5 -4
  128. package/dist/gs/strconv/itoa.gs.js.map +1 -1
  129. package/dist/gs/strconv/quote.gs.d.ts +1 -1
  130. package/dist/gs/strconv/quote.gs.js +311 -103
  131. package/dist/gs/strconv/quote.gs.js.map +1 -1
  132. package/dist/gs/strings/reader.d.ts +1 -1
  133. package/dist/gs/strings/reader.js +8 -8
  134. package/dist/gs/strings/reader.js.map +1 -1
  135. package/dist/gs/strings/strings.js +87 -61
  136. package/dist/gs/strings/strings.js.map +1 -1
  137. package/dist/gs/sync/atomic/doc_64.gs.d.ts +14 -14
  138. package/dist/gs/sync/atomic/doc_64.gs.js +10 -10
  139. package/dist/gs/sync/atomic/doc_64.gs.js.map +1 -1
  140. package/dist/gs/sync/atomic/type.gs.d.ts +22 -22
  141. package/dist/gs/sync/atomic/type.gs.js +4 -4
  142. package/dist/gs/sync/atomic/type.gs.js.map +1 -1
  143. package/dist/gs/sync/sync.js +50 -12
  144. package/dist/gs/sync/sync.js.map +1 -1
  145. package/dist/gs/syscall/fs.d.ts +6 -6
  146. package/dist/gs/syscall/fs.js +1 -1
  147. package/dist/gs/syscall/fs.js.map +1 -1
  148. package/dist/gs/time/time.d.ts +18 -18
  149. package/dist/gs/time/time.js +58 -55
  150. package/dist/gs/time/time.js.map +1 -1
  151. package/dist/gs/unicode/tables.d.ts +11 -0
  152. package/dist/gs/unicode/tables.js +635 -0
  153. package/dist/gs/unicode/tables.js.map +1 -0
  154. package/dist/gs/unicode/unicode.d.ts +58 -38
  155. package/dist/gs/unicode/unicode.js +362 -278
  156. package/dist/gs/unicode/unicode.js.map +1 -1
  157. package/go.sum +13 -0
  158. package/gs/builtin/builtin.ts +83 -93
  159. package/gs/builtin/hostio.ts +1 -1
  160. package/gs/builtin/index.ts +1 -0
  161. package/gs/builtin/panic.test.ts +189 -0
  162. package/gs/builtin/panic.ts +107 -0
  163. package/gs/builtin/runtime-contract.test.ts +5 -5
  164. package/gs/builtin/slice.test.ts +23 -0
  165. package/gs/builtin/slice.ts +133 -95
  166. package/gs/builtin/type.ts +16 -3
  167. package/gs/builtin/varRef.ts +4 -2
  168. package/gs/builtin/wide-int.test.ts +41 -0
  169. package/gs/bytes/bytes.gs.ts +54 -41
  170. package/gs/bytes/bytes.test.ts +18 -1
  171. package/gs/bytes/reader.gs.ts +7 -8
  172. package/gs/cmp/index.test.ts +55 -0
  173. package/gs/cmp/index.ts +45 -9
  174. package/gs/context/context.ts +3 -3
  175. package/gs/embed/index.ts +2 -2
  176. package/gs/encoding/binary/index.test.ts +104 -0
  177. package/gs/encoding/binary/index.ts +259 -11
  178. package/gs/encoding/json/index.test.ts +107 -0
  179. package/gs/encoding/json/index.ts +400 -29
  180. package/gs/errors/errors.test.ts +44 -1
  181. package/gs/errors/errors.ts +15 -31
  182. package/gs/fmt/fmt.test.ts +70 -2
  183. package/gs/fmt/fmt.ts +128 -34
  184. package/gs/golang.org/x/crypto/cryptobyte/index.ts +1 -1
  185. package/gs/internal/bytealg/index.test.ts +26 -1
  186. package/gs/internal/bytealg/index.ts +44 -8
  187. package/gs/internal/byteorder/index.ts +6 -4
  188. package/gs/io/fs/format.ts +2 -2
  189. package/gs/io/fs/fs.ts +2 -2
  190. package/gs/io/fs/stat.test.ts +2 -2
  191. package/gs/io/fs/sub.test.ts +2 -2
  192. package/gs/io/fs/walk.test.ts +2 -2
  193. package/gs/io/io.test.ts +47 -5
  194. package/gs/io/io.ts +73 -73
  195. package/gs/io/limit.test.ts +103 -0
  196. package/gs/math/bits/index.test.ts +128 -0
  197. package/gs/math/bits/index.ts +26 -8
  198. package/gs/math/copysign.gs.test.ts +3 -1
  199. package/gs/math/copysign.gs.ts +10 -22
  200. package/gs/math/pow.gs.test.ts +4 -5
  201. package/gs/math/pow.gs.ts +5 -0
  202. package/gs/math/signbit.gs.test.ts +2 -1
  203. package/gs/math/signbit.gs.ts +6 -3
  204. package/gs/mime/index.ts +1 -0
  205. package/gs/net/http/index.test.ts +683 -2
  206. package/gs/net/http/index.ts +598 -57
  207. package/gs/net/http/meta.json +3 -0
  208. package/gs/os/stat.gs.ts +2 -2
  209. package/gs/os/types.gs.ts +2 -2
  210. package/gs/os/types_js.gs.ts +9 -9
  211. package/gs/os/types_unix.gs.ts +2 -2
  212. package/gs/os/zero_copy_posix.gs.ts +2 -2
  213. package/gs/path/filepath/match.test.ts +16 -0
  214. package/gs/path/filepath/match.ts +8 -4
  215. package/gs/path/filepath/path.test.ts +91 -9
  216. package/gs/path/filepath/path.ts +223 -49
  217. package/gs/path/match.test.ts +32 -0
  218. package/gs/path/match.ts +6 -3
  219. package/gs/reflect/deepequal.test.ts +1 -1
  220. package/gs/reflect/field.test.ts +1 -1
  221. package/gs/reflect/function-types.test.ts +6 -6
  222. package/gs/reflect/sliceat.test.ts +13 -13
  223. package/gs/reflect/structof.test.ts +4 -4
  224. package/gs/reflect/type.ts +34 -14
  225. package/gs/reflect/typefor.test.ts +5 -5
  226. package/gs/runtime/pprof/index.test.ts +20 -0
  227. package/gs/runtime/trace/index.test.ts +3 -0
  228. package/gs/slices/slices.test.ts +31 -0
  229. package/gs/slices/slices.ts +11 -11
  230. package/gs/strconv/append.test.ts +99 -0
  231. package/gs/strconv/atof.gs.ts +156 -42
  232. package/gs/strconv/atof.test.ts +45 -0
  233. package/gs/strconv/atoi.gs.ts +87 -69
  234. package/gs/strconv/atoi.test.ts +49 -0
  235. package/gs/strconv/ftoa.gs.ts +85 -10
  236. package/gs/strconv/ftoa.test.ts +43 -0
  237. package/gs/strconv/itoa.gs.ts +10 -9
  238. package/gs/strconv/quote.gs.ts +335 -108
  239. package/gs/strconv/quote.test.ts +111 -0
  240. package/gs/strings/reader.test.ts +10 -10
  241. package/gs/strings/reader.ts +9 -9
  242. package/gs/strings/strings.test.ts +18 -5
  243. package/gs/strings/strings.ts +81 -68
  244. package/gs/sync/atomic/doc_64.gs.ts +24 -24
  245. package/gs/sync/atomic/doc_64.test.ts +5 -5
  246. package/gs/sync/atomic/type.gs.ts +28 -28
  247. package/gs/sync/sync.test.ts +109 -1
  248. package/gs/sync/sync.ts +46 -12
  249. package/gs/syscall/fs.ts +8 -8
  250. package/gs/syscall/net.test.ts +1 -1
  251. package/gs/time/parse.test.ts +45 -0
  252. package/gs/time/time.test.ts +46 -23
  253. package/gs/time/time.ts +69 -66
  254. package/gs/unicode/gen.go +198 -0
  255. package/gs/unicode/tables.ts +646 -0
  256. package/gs/unicode/unicode.test.ts +69 -0
  257. package/gs/unicode/unicode.ts +396 -312
  258. package/package.json +1 -1
  259. package/dist/gs/github.com/aperturerobotics/util/conc/index.d.ts +0 -20
  260. package/dist/gs/github.com/aperturerobotics/util/conc/index.js +0 -134
  261. package/dist/gs/github.com/aperturerobotics/util/conc/index.js.map +0 -1
  262. package/gs/github.com/aperturerobotics/util/conc/index.test.ts +0 -30
  263. package/gs/github.com/aperturerobotics/util/conc/index.ts +0 -172
  264. package/gs/github.com/aperturerobotics/util/conc/meta.json +0 -9
@@ -1,5 +1,19 @@
1
1
  import type { Slice } from '@goscript/builtin/index.js'
2
2
 
3
+ import {
4
+ categoryData,
5
+ scriptData,
6
+ propertyData,
7
+ foldCategoryData,
8
+ foldScriptData,
9
+ caseRangeData,
10
+ turkishCaseData,
11
+ asciiFold,
12
+ caseOrbitData,
13
+ latin1Props,
14
+ type RangeData,
15
+ } from './tables.js'
16
+
3
17
  // Package unicode provides data and functions to test some properties of Unicode code points.
4
18
 
5
19
  // Constants
@@ -15,8 +29,12 @@ export const LowerCase = 1
15
29
  export const TitleCase = 2
16
30
  export const MaxCase = 3
17
31
 
32
+ // UpperLower is the delta sentinel marking an upper/lower alternating CaseRange.
18
33
  export const UpperLower = MaxRune + 1
19
34
 
35
+ // linearMax is the maximum size table for linear search for non-Latin1 rune.
36
+ const linearMax = 18
37
+
20
38
  // Range16 represents a range of 16-bit Unicode code points
21
39
  type Range16Init = {
22
40
  Lo?: number
@@ -156,369 +174,435 @@ export class CaseRange {
156
174
  }
157
175
  }
158
176
 
159
- // SpecialCase represents language-specific case mappings
160
- export type SpecialCase = CaseRange[]
177
+ // SpecialCase represents language-specific case mappings such as Turkish.
178
+ // It carries the language's CaseRange overrides and falls back to the package
179
+ // case mappings when a rune is not covered, matching Go's SpecialCase methods.
180
+ export class SpecialCase {
181
+ public ranges: CaseRange[]
161
182
 
162
- // Basic character classification functions using JavaScript's built-in Unicode support
183
+ constructor(ranges: CaseRange[] = []) {
184
+ this.ranges = ranges
185
+ }
163
186
 
164
- // IsControl reports whether the rune is a control character
165
- export function IsControl(r: number): boolean {
166
- // Control characters are in categories Cc, Cf, Co, Cs
167
- if (r < 0 || r > MaxRune) return false
168
- const char = String.fromCodePoint(r)
169
- // Use regex to match control characters
170
- return /[\p{Cc}\p{Cf}\p{Co}\p{Cs}]/u.test(char)
187
+ public ToUpper(r: number): number {
188
+ let [r1, hadMapping] = to(UpperCase, r, this.ranges)
189
+ if (r1 === r && !hadMapping) {
190
+ r1 = ToUpper(r)
191
+ }
192
+ return r1
193
+ }
194
+
195
+ public ToTitle(r: number): number {
196
+ let [r1, hadMapping] = to(TitleCase, r, this.ranges)
197
+ if (r1 === r && !hadMapping) {
198
+ r1 = ToTitle(r)
199
+ }
200
+ return r1
201
+ }
202
+
203
+ public ToLower(r: number): number {
204
+ let [r1, hadMapping] = to(LowerCase, r, this.ranges)
205
+ if (r1 === r && !hadMapping) {
206
+ r1 = ToLower(r)
207
+ }
208
+ return r1
209
+ }
171
210
  }
172
211
 
173
- // IsDigit reports whether the rune is a decimal digit
174
- export function IsDigit(r: number): boolean {
175
- if (r < 0 || r > MaxRune) return false
176
- const char = String.fromCodePoint(r)
177
- return /\p{Nd}/u.test(char)
212
+ // Table construction from the generated pure-numeric tables.
213
+
214
+ function buildRangeTable(d: RangeData): RangeTable {
215
+ return new RangeTable(
216
+ d[0].map((t) => new Range16(t[0], t[1], t[2])),
217
+ d[1].map((t) => new Range32(t[0], t[1], t[2])),
218
+ d[2],
219
+ )
178
220
  }
179
221
 
180
- // IsGraphic reports whether the rune is defined as a Graphic by Unicode
181
- export function IsGraphic(r: number): boolean {
182
- if (r < 0 || r > MaxRune) return false
183
- return IsLetter(r) || IsMark(r) || IsNumber(r) || IsPunct(r) || IsSymbol(r)
222
+ function buildTableMap(data: Record<string, RangeData>): Map<string, RangeTable> {
223
+ const out = new Map<string, RangeTable>()
224
+ for (const key of Object.keys(data)) {
225
+ out.set(key, buildRangeTable(data[key]))
226
+ }
227
+ return out
184
228
  }
185
229
 
186
- // IsLetter reports whether the rune is a letter (category L)
187
- export function IsLetter(r: number): boolean {
188
- if (r < 0 || r > MaxRune) return false
189
- const char = String.fromCodePoint(r)
190
- return /\p{L}/u.test(char)
230
+ function buildCaseRanges(
231
+ data: Array<[number, number, number, number, number]>,
232
+ ): CaseRange[] {
233
+ return data.map((d) => new CaseRange(d[0], d[1], [d[2], d[3], d[4]]))
191
234
  }
192
235
 
193
- // IsLower reports whether the rune is a lower case letter
194
- export function IsLower(r: number): boolean {
195
- if (r < 0 || r > MaxRune) return false
196
- const char = String.fromCodePoint(r)
197
- return /\p{Ll}/u.test(char)
236
+ // Categories is the set of Unicode general category tables keyed by name.
237
+ export const Categories = buildTableMap(categoryData)
238
+ export const Scripts = buildTableMap(scriptData)
239
+ export const Properties = buildTableMap(propertyData)
240
+ export const FoldCategory = buildTableMap(foldCategoryData)
241
+ export const FoldScript = buildTableMap(foldScriptData)
242
+
243
+ // CategoryAliases maps alternate category names to their canonical names.
244
+ export const CategoryAliases = new Map<string, string>([
245
+ ['Other', 'C'],
246
+ ['cntrl', 'Cc'],
247
+ ['Letter', 'L'],
248
+ ['Mark', 'M'],
249
+ ['Number', 'N'],
250
+ ['Punctuation', 'P'],
251
+ ['Symbol', 'S'],
252
+ ['Separator', 'Z'],
253
+ ['digit', 'Nd'],
254
+ ])
255
+
256
+ // Named general category tables.
257
+ export const C = Categories.get('C')!
258
+ export const Cc = Categories.get('Cc')!
259
+ export const Cf = Categories.get('Cf')!
260
+ export const Cn = Categories.get('Cn')!
261
+ export const Co = Categories.get('Co')!
262
+ export const Cs = Categories.get('Cs')!
263
+ export const L = Categories.get('L')!
264
+ export const LC = Categories.get('LC')!
265
+ export const Ll = Categories.get('Ll')!
266
+ export const Lm = Categories.get('Lm')!
267
+ export const Lo = Categories.get('Lo')!
268
+ export const Lt = Categories.get('Lt')!
269
+ export const Lu = Categories.get('Lu')!
270
+ export const M = Categories.get('M')!
271
+ export const Mc = Categories.get('Mc')!
272
+ export const Me = Categories.get('Me')!
273
+ export const Mn = Categories.get('Mn')!
274
+ export const N = Categories.get('N')!
275
+ export const Nd = Categories.get('Nd')!
276
+ export const Nl = Categories.get('Nl')!
277
+ export const No = Categories.get('No')!
278
+ export const P = Categories.get('P')!
279
+ export const Pc = Categories.get('Pc')!
280
+ export const Pd = Categories.get('Pd')!
281
+ export const Pe = Categories.get('Pe')!
282
+ export const Pf = Categories.get('Pf')!
283
+ export const Pi = Categories.get('Pi')!
284
+ export const Po = Categories.get('Po')!
285
+ export const Ps = Categories.get('Ps')!
286
+ export const S = Categories.get('S')!
287
+ export const Sc = Categories.get('Sc')!
288
+ export const Sk = Categories.get('Sk')!
289
+ export const Sm = Categories.get('Sm')!
290
+ export const So = Categories.get('So')!
291
+ export const Z = Categories.get('Z')!
292
+ export const Zl = Categories.get('Zl')!
293
+ export const Zp = Categories.get('Zp')!
294
+ export const Zs = Categories.get('Zs')!
295
+
296
+ // Friendly category aliases matching Go's exported names.
297
+ export const Letter = L
298
+ export const Mark = M
299
+ export const Number = N
300
+ export const Other = C
301
+ export const Punct = P
302
+ export const Space = Z
303
+ export const Symbol = S
304
+ export const Digit = Nd
305
+ export const Lower = Ll
306
+ export const Title = Lt
307
+ export const Upper = Lu
308
+
309
+ // White_Space is the Unicode property table used for non-Latin1 IsSpace.
310
+ export const White_Space = Properties.get('White_Space')!
311
+
312
+ // GraphicRanges defines the set of graphic characters according to Unicode.
313
+ export const GraphicRanges: RangeTable[] = [L, M, N, P, S, Zs]
314
+
315
+ // PrintRanges defines the set of printable characters according to Go.
316
+ export const PrintRanges: RangeTable[] = [L, M, N, P, S]
317
+
318
+ // CaseRanges is the table of Unicode case mappings.
319
+ export const CaseRanges: CaseRange[] = buildCaseRanges(caseRangeData)
320
+
321
+ // TurkishCase / AzeriCase are the Turkish (and Azeri) special case mappings.
322
+ export const TurkishCase = new SpecialCase(buildCaseRanges(turkishCaseData))
323
+ export const AzeriCase = TurkishCase
324
+
325
+ // caseOrbit maps a rune to the next rune in its simple-fold orbit when that next
326
+ // rune differs from the plain case toggle. SimpleFold consults it above ASCII.
327
+ const caseOrbit = new Map<number, number>(caseOrbitData)
328
+
329
+ // searchRanges reports whether r is contained in the sorted range list, using a
330
+ // linear scan for short tables or Latin1 runes and a binary search otherwise.
331
+ function searchRanges(ranges: Array<Range16 | Range32>, r: number): boolean {
332
+ if (ranges.length <= linearMax || r <= MaxLatin1) {
333
+ for (const range of ranges) {
334
+ if (r < range.Lo) {
335
+ return false
336
+ }
337
+ if (r <= range.Hi) {
338
+ return range.Stride === 1 || (r - range.Lo) % range.Stride === 0
339
+ }
340
+ }
341
+ return false
342
+ }
343
+ let lo = 0
344
+ let hi = ranges.length
345
+ while (lo < hi) {
346
+ const m = lo + ((hi - lo) >> 1)
347
+ const range = ranges[m]
348
+ if (range.Lo <= r && r <= range.Hi) {
349
+ return range.Stride === 1 || (r - range.Lo) % range.Stride === 0
350
+ }
351
+ if (r < range.Lo) {
352
+ hi = m
353
+ } else {
354
+ lo = m + 1
355
+ }
356
+ }
357
+ return false
198
358
  }
199
359
 
200
- // IsMark reports whether the rune is a mark character (category M)
201
- export function IsMark(r: number): boolean {
202
- if (r < 0 || r > MaxRune) return false
203
- const char = String.fromCodePoint(r)
204
- return /\p{M}/u.test(char)
360
+ // Is reports whether the rune is in the specified table of ranges.
361
+ export function Is(rangeTab: RangeTable, r: number): boolean {
362
+ const r16 = rangeTab.R16
363
+ // Compare as unsigned to correctly reject negative runes.
364
+ if (r16.length > 0 && (r >>> 0) <= (r16[r16.length - 1].Hi >>> 0)) {
365
+ return searchRanges(r16, r & 0xffff)
366
+ }
367
+ const r32 = rangeTab.R32
368
+ if (r32.length > 0 && r >= r32[0].Lo) {
369
+ return searchRanges(r32, r)
370
+ }
371
+ return false
205
372
  }
206
373
 
207
- // IsNumber reports whether the rune is a number (category N)
208
- export function IsNumber(r: number): boolean {
209
- if (r < 0 || r > MaxRune) return false
210
- const char = String.fromCodePoint(r)
211
- return /\p{N}/u.test(char)
374
+ // In reports whether the rune is a member of one of the ranges.
375
+ export function In(r: number, ...ranges: RangeTable[]): boolean {
376
+ for (const rangeTab of ranges) {
377
+ if (Is(rangeTab, r)) {
378
+ return true
379
+ }
380
+ }
381
+ return false
212
382
  }
213
383
 
214
- // IsPrint reports whether the rune is defined as printable by Go
215
- export function IsPrint(r: number): boolean {
216
- if (r < 0 || r > MaxRune) return false
217
- if (IsGraphic(r)) return true
218
- return r === 0x20 // space character
384
+ // IsOneOf reports whether the rune is a member of one of the ranges.
385
+ export function IsOneOf(ranges: RangeTable[], r: number): boolean {
386
+ for (const rangeTab of ranges) {
387
+ if (Is(rangeTab, r)) {
388
+ return true
389
+ }
390
+ }
391
+ return false
219
392
  }
220
393
 
221
- // IsPunct reports whether the rune is a punctuation character (category P)
222
- export function IsPunct(r: number): boolean {
223
- if (r < 0 || r > MaxRune) return false
224
- const char = String.fromCodePoint(r)
225
- return /\p{P}/u.test(char)
394
+ // to maps the rune using the specified case and case-range table, returning the
395
+ // mapped rune and whether a mapping was found.
396
+ function to(_case: number, r: number, caseRange: CaseRange[]): [number, boolean] {
397
+ if (_case < 0 || _case >= MaxCase) {
398
+ return [ReplacementChar, false]
399
+ }
400
+ let lo = 0
401
+ let hi = caseRange.length
402
+ while (lo < hi) {
403
+ const m = lo + ((hi - lo) >> 1)
404
+ const cr = caseRange[m]
405
+ if (cr.Lo <= r && r <= cr.Hi) {
406
+ const delta = cr.Delta[_case]
407
+ if (delta > MaxRune) {
408
+ // In an upper/lower alternating sequence the even offsets from the start
409
+ // are upper case and the odd offsets lower; clearing or setting the low
410
+ // bit of the offset selects the right case.
411
+ return [cr.Lo + (((r - cr.Lo) & ~1) | (_case & 1)), true]
412
+ }
413
+ return [r + delta, true]
414
+ }
415
+ if (r < cr.Lo) {
416
+ hi = m
417
+ } else {
418
+ lo = m + 1
419
+ }
420
+ }
421
+ return [r, false]
226
422
  }
227
423
 
228
- // IsSpace reports whether the rune is a space character
229
- export function IsSpace(r: number): boolean {
230
- if (r < 0 || r > MaxRune) return false
231
- const char = String.fromCodePoint(r)
232
- return /\s/u.test(char) || /\p{Z}/u.test(char)
424
+ // To maps the rune to the specified case: UpperCase, LowerCase, or TitleCase.
425
+ export function To(_case: number, r: number): number {
426
+ return to(_case, r, CaseRanges)[0]
233
427
  }
234
428
 
235
- // IsSymbol reports whether the rune is a symbol character (category S)
236
- export function IsSymbol(r: number): boolean {
237
- if (r < 0 || r > MaxRune) return false
238
- const char = String.fromCodePoint(r)
239
- return /\p{S}/u.test(char)
429
+ // ToUpper maps the rune to upper case.
430
+ export function ToUpper(r: number): number {
431
+ if (r <= MaxASCII) {
432
+ if (r >= 0x61 && r <= 0x7a) {
433
+ r -= 0x20
434
+ }
435
+ return r
436
+ }
437
+ return To(UpperCase, r)
240
438
  }
241
439
 
242
- // IsTitle reports whether the rune is a title case letter
243
- export function IsTitle(r: number): boolean {
244
- if (r < 0 || r > MaxRune) return false
245
- const char = String.fromCodePoint(r)
246
- return /\p{Lt}/u.test(char)
440
+ // ToLower maps the rune to lower case.
441
+ export function ToLower(r: number): number {
442
+ if (r <= MaxASCII) {
443
+ if (r >= 0x41 && r <= 0x5a) {
444
+ r += 0x20
445
+ }
446
+ return r
447
+ }
448
+ return To(LowerCase, r)
247
449
  }
248
450
 
249
- // IsUpper reports whether the rune is an upper case letter
250
- export function IsUpper(r: number): boolean {
251
- if (r < 0 || r > MaxRune) return false
252
- const char = String.fromCodePoint(r)
253
- return /\p{Lu}/u.test(char)
451
+ // ToTitle maps the rune to title case.
452
+ export function ToTitle(r: number): number {
453
+ if (r <= MaxASCII) {
454
+ if (r >= 0x61 && r <= 0x7a) {
455
+ r -= 0x20
456
+ }
457
+ return r
458
+ }
459
+ return To(TitleCase, r)
254
460
  }
255
461
 
256
- // Case conversion functions
462
+ // SimpleFold iterates over the Unicode code points equivalent under simple case
463
+ // folding, returning the next rune in the fold orbit.
464
+ export function SimpleFold(r: number): number {
465
+ if (r < 0 || r > MaxRune) {
466
+ return r
467
+ }
468
+ if (r < asciiFold.length) {
469
+ return asciiFold[r]
470
+ }
471
+ const orbit = caseOrbit.get(r)
472
+ if (orbit !== undefined) {
473
+ return orbit
474
+ }
475
+ const l = ToLower(r)
476
+ if (l !== r) {
477
+ return l
478
+ }
479
+ return ToUpper(r)
480
+ }
257
481
 
258
- // ToLower returns the lowercase mapping of the rune
259
- export function ToLower(r: number): number {
260
- if (r < 0 || r > MaxRune) return r
261
- const char = String.fromCodePoint(r)
262
- const lower = char.toLowerCase()
263
- return lower.codePointAt(0) || r
482
+ // Latin1 property bits, kept in sync with the l1* constants in gen.go. The
483
+ // latin1Props table records Go's predicate results for U+0000..U+00FF so the
484
+ // Latin1 fast paths stay byte-faithful without re-deriving Go's property masks.
485
+ const L1_CONTROL = 1 << 0
486
+ const L1_LETTER = 1 << 1
487
+ const L1_UPPER = 1 << 2
488
+ const L1_LOWER = 1 << 3
489
+ const L1_TITLE = 1 << 4
490
+ const L1_NUMBER = 1 << 5
491
+ const L1_DIGIT = 1 << 6
492
+ const L1_MARK = 1 << 7
493
+ const L1_PUNCT = 1 << 8
494
+ const L1_SYMBOL = 1 << 9
495
+ const L1_SPACE = 1 << 10
496
+ const L1_GRAPHIC = 1 << 11
497
+ const L1_PRINT = 1 << 12
498
+
499
+ function isLatin1(r: number): boolean {
500
+ return r >= 0 && r <= MaxLatin1
264
501
  }
265
502
 
266
- // ToUpper returns the uppercase mapping of the rune
267
- export function ToUpper(r: number): number {
268
- if (r < 0 || r > MaxRune) return r
269
- const char = String.fromCodePoint(r)
270
- const upper = char.toUpperCase()
271
- return upper.codePointAt(0) || r
503
+ // IsControl reports whether the rune is a control character. Such characters do
504
+ // not appear above Latin1.
505
+ export function IsControl(r: number): boolean {
506
+ if (isLatin1(r)) {
507
+ return (latin1Props[r] & L1_CONTROL) !== 0
508
+ }
509
+ return false
272
510
  }
273
511
 
274
- // ToTitle returns the title case mapping of the rune
275
- export function ToTitle(r: number): number {
276
- // For most characters, title case is the same as uppercase
277
- return ToUpper(r)
512
+ // IsDigit reports whether the rune is a decimal digit.
513
+ export function IsDigit(r: number): boolean {
514
+ if (isLatin1(r)) {
515
+ return (latin1Props[r] & L1_DIGIT) !== 0
516
+ }
517
+ return Is(Digit, r)
278
518
  }
279
519
 
280
- // To returns the case mapping of the rune
281
- export function To(_case: number, r: number): number {
282
- switch (_case) {
283
- case UpperCase:
284
- return ToUpper(r)
285
- case LowerCase:
286
- return ToLower(r)
287
- case TitleCase:
288
- return ToTitle(r)
289
- default:
290
- return r
520
+ // IsLetter reports whether the rune is a letter (category L).
521
+ export function IsLetter(r: number): boolean {
522
+ if (isLatin1(r)) {
523
+ return (latin1Props[r] & L1_LETTER) !== 0
291
524
  }
525
+ return Is(Letter, r)
292
526
  }
293
527
 
294
- // SimpleFold returns the next rune in the simple case folding sequence
295
- export function SimpleFold(r: number): number {
296
- if (r < 0 || r > MaxRune) return r
528
+ // IsNumber reports whether the rune is a number (category N).
529
+ export function IsNumber(r: number): boolean {
530
+ if (isLatin1(r)) {
531
+ return (latin1Props[r] & L1_NUMBER) !== 0
532
+ }
533
+ return Is(Number, r)
534
+ }
297
535
 
298
- // Simple implementation - just toggle between upper and lower case
299
- if (IsUpper(r)) {
300
- return ToLower(r)
301
- } else if (IsLower(r)) {
302
- return ToUpper(r)
536
+ // IsMark reports whether the rune is a mark character (category M).
537
+ export function IsMark(r: number): boolean {
538
+ if (isLatin1(r)) {
539
+ return (latin1Props[r] & L1_MARK) !== 0
303
540
  }
304
- return r
541
+ return Is(Mark, r)
305
542
  }
306
543
 
307
- // Is reports whether the rune is in the specified table of ranges
308
- export function Is(rangeTab: RangeTable, r: number): boolean {
309
- if (r < 0 || r > MaxRune) return false
544
+ // IsSpace reports whether the rune is a space character as defined by Unicode.
545
+ export function IsSpace(r: number): boolean {
546
+ if (isLatin1(r)) {
547
+ return (latin1Props[r] & L1_SPACE) !== 0
548
+ }
549
+ return Is(White_Space, r)
550
+ }
310
551
 
311
- // Check 16-bit ranges
312
- for (const range of rangeTab.R16) {
313
- if (r < range.Lo) break
314
- if (r <= range.Hi) {
315
- return range.Stride === 1 || (r - range.Lo) % range.Stride === 0
316
- }
552
+ // IsPunct reports whether the rune is a punctuation character (category P).
553
+ export function IsPunct(r: number): boolean {
554
+ if (isLatin1(r)) {
555
+ return (latin1Props[r] & L1_PUNCT) !== 0
317
556
  }
557
+ return Is(Punct, r)
558
+ }
318
559
 
319
- // Check 32-bit ranges
320
- for (const range of rangeTab.R32) {
321
- if (r < range.Lo) break
322
- if (r <= range.Hi) {
323
- return range.Stride === 1 || (r - range.Lo) % range.Stride === 0
324
- }
560
+ // IsSymbol reports whether the rune is a symbolic character (category S).
561
+ export function IsSymbol(r: number): boolean {
562
+ if (isLatin1(r)) {
563
+ return (latin1Props[r] & L1_SYMBOL) !== 0
325
564
  }
565
+ return Is(Symbol, r)
566
+ }
326
567
 
327
- return false
568
+ // IsUpper reports whether the rune is an upper case letter.
569
+ export function IsUpper(r: number): boolean {
570
+ if (isLatin1(r)) {
571
+ return (latin1Props[r] & L1_UPPER) !== 0
572
+ }
573
+ return Is(Upper, r)
328
574
  }
329
575
 
330
- // In reports whether the rune is a member of one of the ranges
331
- export function In(r: number, ...ranges: RangeTable[]): boolean {
332
- for (const rangeTab of ranges) {
333
- if (Is(rangeTab, r)) {
334
- return true
335
- }
576
+ // IsLower reports whether the rune is a lower case letter.
577
+ export function IsLower(r: number): boolean {
578
+ if (isLatin1(r)) {
579
+ return (latin1Props[r] & L1_LOWER) !== 0
336
580
  }
337
- return false
581
+ return Is(Lower, r)
338
582
  }
339
583
 
340
- // IsOneOf reports whether the rune is a member of one of the ranges
341
- export function IsOneOf(ranges: RangeTable[], r: number): boolean {
342
- return In(r, ...ranges)
343
- }
344
-
345
- // Predefined range tables for common character categories
346
- // These are simplified versions - in a full implementation, these would contain
347
- // the complete Unicode range data
348
-
349
- export const Letter = new RangeTable(
350
- [new Range16(0x0041, 0x005a, 1), new Range16(0x0061, 0x007a, 1)], // Basic Latin letters
351
- [],
352
- )
353
-
354
- export const Digit = new RangeTable(
355
- [new Range16(0x0030, 0x0039, 1)], // ASCII digits
356
- [],
357
- )
358
-
359
- export const Space = new RangeTable(
360
- [new Range16(0x0009, 0x000d, 1), new Range16(0x0020, 0x0020, 1)], // Basic whitespace
361
- [],
362
- )
363
-
364
- export const Upper = new RangeTable(
365
- [new Range16(0x0041, 0x005a, 1)], // ASCII uppercase
366
- [],
367
- )
368
-
369
- export const Lower = new RangeTable(
370
- [new Range16(0x0061, 0x007a, 1)], // ASCII lowercase
371
- [],
372
- )
373
-
374
- export const Title = new RangeTable([], [])
375
-
376
- export const Punct = new RangeTable(
377
- [
378
- new Range16(0x0021, 0x002f, 1), // !"#$%&'()*+,-./
379
- new Range16(0x003a, 0x0040, 1), // :;<=>?@
380
- new Range16(0x005b, 0x0060, 1), // [\]^_`
381
- new Range16(0x007b, 0x007e, 1), // {|}~
382
- ],
383
- [],
384
- )
385
-
386
- export const Symbol = new RangeTable([], [])
387
-
388
- export const Mark = new RangeTable([], [])
389
-
390
- export const Number = new RangeTable(
391
- [new Range16(0x0030, 0x0039, 1)], // ASCII digits
392
- [],
393
- )
394
-
395
- // Categories map
396
- export const Categories = new Map<string, RangeTable>([
397
- ['L', Letter],
398
- ['Ll', Lower],
399
- ['Lu', Upper],
400
- ['Lt', Title],
401
- ['M', Mark],
402
- ['N', Number],
403
- ['Nd', Digit],
404
- ['P', Punct],
405
- ['S', Symbol],
406
- ['Z', Space],
407
- ])
584
+ // IsTitle reports whether the rune is a title case letter.
585
+ export function IsTitle(r: number): boolean {
586
+ if (isLatin1(r)) {
587
+ return (latin1Props[r] & L1_TITLE) !== 0
588
+ }
589
+ return Is(Title, r)
590
+ }
408
591
 
409
- export const CategoryAliases = new Map<string, string>([
410
- ['C', 'C'],
411
- ['Cc', 'Cc'],
412
- ['cntrl', 'Cc'],
413
- ['Cf', 'Cf'],
414
- ['Co', 'Co'],
415
- ['Cs', 'Cs'],
416
- ['L', 'L'],
417
- ['LC', 'LC'],
418
- ['Ll', 'Ll'],
419
- ['Lm', 'Lm'],
420
- ['Lo', 'Lo'],
421
- ['Lt', 'Lt'],
422
- ['Lu', 'Lu'],
423
- ['M', 'M'],
424
- ['Mc', 'Mc'],
425
- ['Me', 'Me'],
426
- ['Mn', 'Mn'],
427
- ['N', 'N'],
428
- ['Nd', 'Nd'],
429
- ['digit', 'Nd'],
430
- ['Nl', 'Nl'],
431
- ['No', 'No'],
432
- ['P', 'P'],
433
- ['Pc', 'Pc'],
434
- ['Pd', 'Pd'],
435
- ['Pe', 'Pe'],
436
- ['Pf', 'Pf'],
437
- ['Pi', 'Pi'],
438
- ['Po', 'Po'],
439
- ['Ps', 'Ps'],
440
- ['S', 'S'],
441
- ['Sc', 'Sc'],
442
- ['Sk', 'Sk'],
443
- ['Sm', 'Sm'],
444
- ['So', 'So'],
445
- ['Z', 'Z'],
446
- ['Zl', 'Zl'],
447
- ['Zp', 'Zp'],
448
- ['Zs', 'Zs'],
449
- ])
592
+ // IsGraphic reports whether the rune is defined as a Graphic by Unicode: letters,
593
+ // marks, numbers, punctuation, symbols, and spaces (categories L, M, N, P, S, Zs).
594
+ export function IsGraphic(r: number): boolean {
595
+ if (isLatin1(r)) {
596
+ return (latin1Props[r] & L1_GRAPHIC) !== 0
597
+ }
598
+ return In(r, ...GraphicRanges)
599
+ }
450
600
 
451
- // Scripts and Properties maps (simplified)
452
- export const Scripts = new Map<string, RangeTable>()
453
- export const Properties = new Map<string, RangeTable>()
454
- export const FoldCategory = new Map<string, RangeTable>()
455
- export const FoldScript = new Map<string, RangeTable>()
456
-
457
- // Graphic ranges
458
- export const GraphicRanges = [Letter, Mark, Number, Punct, Symbol]
459
-
460
- // Print ranges
461
- export const PrintRanges = [Letter, Mark, Number, Punct, Symbol, Space]
462
-
463
- // Case ranges (simplified)
464
- export const CaseRanges: CaseRange[] = []
465
-
466
- // Special cases
467
- export const TurkishCase: SpecialCase = []
468
- export const AzeriCase: SpecialCase = TurkishCase
469
-
470
- // Predefined character categories (simplified implementations)
471
- export const Cc = new RangeTable(
472
- [new Range16(0x0000, 0x001f, 1), new Range16(0x007f, 0x009f, 1)],
473
- [],
474
- )
475
- export const Cf = new RangeTable([], [])
476
- export const Cn = new RangeTable([], [])
477
- export const Co = new RangeTable([], [])
478
- export const Cs = new RangeTable([new Range16(0xd800, 0xdfff, 1)], [])
479
- export const Lm = new RangeTable([], [])
480
- export const Lo = new RangeTable([], [])
481
- export const Mc = new RangeTable([], [])
482
- export const Me = new RangeTable([], [])
483
- export const Mn = new RangeTable([], [])
484
- export const Nl = new RangeTable([], [])
485
- export const No = new RangeTable([], [])
486
- export const Pc = new RangeTable([new Range16(0x005f, 0x005f, 1)], []) // underscore
487
- export const Pd = new RangeTable([new Range16(0x002d, 0x002d, 1)], []) // hyphen
488
- export const Pe = new RangeTable(
489
- [
490
- new Range16(0x0029, 0x0029, 1),
491
- new Range16(0x005d, 0x005d, 1),
492
- new Range16(0x007d, 0x007d, 1),
493
- ],
494
- [],
495
- )
496
- export const Pf = new RangeTable([], [])
497
- export const Pi = new RangeTable([], [])
498
- export const Po = new RangeTable(
499
- [new Range16(0x0021, 0x0023, 1), new Range16(0x0025, 0x0027, 1)],
500
- [],
501
- )
502
- export const Ps = new RangeTable(
503
- [
504
- new Range16(0x0028, 0x0028, 1),
505
- new Range16(0x005b, 0x005b, 1),
506
- new Range16(0x007b, 0x007b, 1),
507
- ],
508
- [],
509
- )
510
- export const Sc = new RangeTable([new Range16(0x0024, 0x0024, 1)], []) // dollar sign
511
- export const Sk = new RangeTable(
512
- [new Range16(0x005e, 0x005e, 1), new Range16(0x0060, 0x0060, 1)],
513
- [],
514
- )
515
- export const Sm = new RangeTable(
516
- [new Range16(0x002b, 0x002b, 1), new Range16(0x003c, 0x003e, 1)],
517
- [],
518
- )
519
- export const So = new RangeTable([], [])
520
- export const Zl = new RangeTable([], [])
521
- export const Zp = new RangeTable([], [])
522
- export const Zs = new RangeTable([new Range16(0x0020, 0x0020, 1)], []) // space
523
-
524
- Categories.set('Cn', Cn)
601
+ // IsPrint reports whether the rune is defined as printable by Go: the Graphic
602
+ // characters minus the non-ASCII spaces (so only ASCII space is printable).
603
+ export function IsPrint(r: number): boolean {
604
+ if (isLatin1(r)) {
605
+ return (latin1Props[r] & L1_PRINT) !== 0
606
+ }
607
+ return In(r, ...PrintRanges)
608
+ }