@herb-tools/node 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. package/CHANGELOG.md +3 -0
  2. package/README.md +9 -0
  3. package/binding.gyp +98 -0
  4. package/dist/herb-node.cjs +20 -0
  5. package/dist/herb-node.cjs.map +1 -0
  6. package/dist/herb-node.esm.js +33 -0
  7. package/dist/herb-node.esm.js.map +1 -0
  8. package/dist/types/index-cjs.d.cts +1 -0
  9. package/dist/types/index-esm.d.mts +8 -0
  10. package/dist/types/node-backend.d.ts +4 -0
  11. package/dist/types/util.d.ts +2 -0
  12. package/extension/error_helpers.cpp +340 -0
  13. package/extension/error_helpers.h +26 -0
  14. package/extension/extension_helpers.cpp +206 -0
  15. package/extension/extension_helpers.h +22 -0
  16. package/extension/herb.cpp +232 -0
  17. package/extension/libherb/analyze.c +989 -0
  18. package/extension/libherb/analyze_helpers.c +241 -0
  19. package/extension/libherb/analyzed_ruby.c +35 -0
  20. package/extension/libherb/array.c +137 -0
  21. package/extension/libherb/ast_node.c +81 -0
  22. package/extension/libherb/ast_nodes.c +866 -0
  23. package/extension/libherb/ast_pretty_print.c +588 -0
  24. package/extension/libherb/buffer.c +199 -0
  25. package/extension/libherb/errors.c +740 -0
  26. package/extension/libherb/extract.c +110 -0
  27. package/extension/libherb/herb.c +103 -0
  28. package/extension/libherb/html_util.c +143 -0
  29. package/extension/libherb/include/analyze.h +36 -0
  30. package/extension/libherb/include/analyze_helpers.h +43 -0
  31. package/extension/libherb/include/analyzed_ruby.h +33 -0
  32. package/extension/libherb/include/array.h +33 -0
  33. package/extension/libherb/include/ast_node.h +35 -0
  34. package/extension/libherb/include/ast_nodes.h +303 -0
  35. package/extension/libherb/include/ast_pretty_print.h +17 -0
  36. package/extension/libherb/include/buffer.h +36 -0
  37. package/extension/libherb/include/errors.h +125 -0
  38. package/extension/libherb/include/extract.h +20 -0
  39. package/extension/libherb/include/herb.h +32 -0
  40. package/extension/libherb/include/html_util.h +13 -0
  41. package/extension/libherb/include/io.h +9 -0
  42. package/extension/libherb/include/json.h +28 -0
  43. package/extension/libherb/include/lexer.h +13 -0
  44. package/extension/libherb/include/lexer_peek_helpers.h +23 -0
  45. package/extension/libherb/include/lexer_struct.h +32 -0
  46. package/extension/libherb/include/location.h +25 -0
  47. package/extension/libherb/include/macros.h +10 -0
  48. package/extension/libherb/include/memory.h +12 -0
  49. package/extension/libherb/include/parser.h +22 -0
  50. package/extension/libherb/include/parser_helpers.h +33 -0
  51. package/extension/libherb/include/position.h +22 -0
  52. package/extension/libherb/include/pretty_print.h +53 -0
  53. package/extension/libherb/include/prism_helpers.h +18 -0
  54. package/extension/libherb/include/range.h +23 -0
  55. package/extension/libherb/include/ruby_parser.h +6 -0
  56. package/extension/libherb/include/token.h +25 -0
  57. package/extension/libherb/include/token_matchers.h +21 -0
  58. package/extension/libherb/include/token_struct.h +51 -0
  59. package/extension/libherb/include/util.h +25 -0
  60. package/extension/libherb/include/version.h +6 -0
  61. package/extension/libherb/include/visitor.h +11 -0
  62. package/extension/libherb/io.c +30 -0
  63. package/extension/libherb/json.c +205 -0
  64. package/extension/libherb/lexer.c +284 -0
  65. package/extension/libherb/lexer_peek_helpers.c +59 -0
  66. package/extension/libherb/location.c +41 -0
  67. package/extension/libherb/main.c +162 -0
  68. package/extension/libherb/memory.c +53 -0
  69. package/extension/libherb/parser.c +704 -0
  70. package/extension/libherb/parser_helpers.c +161 -0
  71. package/extension/libherb/position.c +33 -0
  72. package/extension/libherb/pretty_print.c +242 -0
  73. package/extension/libherb/prism_helpers.c +50 -0
  74. package/extension/libherb/range.c +38 -0
  75. package/extension/libherb/ruby_parser.c +47 -0
  76. package/extension/libherb/token.c +194 -0
  77. package/extension/libherb/token_matchers.c +32 -0
  78. package/extension/libherb/util.c +128 -0
  79. package/extension/libherb/visitor.c +321 -0
  80. package/extension/nodes.cpp +1060 -0
  81. package/extension/nodes.h +44 -0
  82. package/extension/prism/include/prism/ast.h +7964 -0
  83. package/extension/prism/include/prism/defines.h +260 -0
  84. package/extension/prism/include/prism/diagnostic.h +451 -0
  85. package/extension/prism/include/prism/encoding.h +283 -0
  86. package/extension/prism/include/prism/node.h +129 -0
  87. package/extension/prism/include/prism/options.h +442 -0
  88. package/extension/prism/include/prism/pack.h +163 -0
  89. package/extension/prism/include/prism/parser.h +933 -0
  90. package/extension/prism/include/prism/prettyprint.h +34 -0
  91. package/extension/prism/include/prism/regexp.h +43 -0
  92. package/extension/prism/include/prism/static_literals.h +121 -0
  93. package/extension/prism/include/prism/util/pm_buffer.h +228 -0
  94. package/extension/prism/include/prism/util/pm_char.h +204 -0
  95. package/extension/prism/include/prism/util/pm_constant_pool.h +218 -0
  96. package/extension/prism/include/prism/util/pm_integer.h +126 -0
  97. package/extension/prism/include/prism/util/pm_list.h +97 -0
  98. package/extension/prism/include/prism/util/pm_memchr.h +29 -0
  99. package/extension/prism/include/prism/util/pm_newline_list.h +113 -0
  100. package/extension/prism/include/prism/util/pm_string.h +190 -0
  101. package/extension/prism/include/prism/util/pm_strncasecmp.h +32 -0
  102. package/extension/prism/include/prism/util/pm_strpbrk.h +46 -0
  103. package/extension/prism/include/prism/version.h +29 -0
  104. package/extension/prism/include/prism.h +383 -0
  105. package/extension/prism/src/diagnostic.c +842 -0
  106. package/extension/prism/src/encoding.c +5235 -0
  107. package/extension/prism/src/node.c +8674 -0
  108. package/extension/prism/src/options.c +328 -0
  109. package/extension/prism/src/pack.c +509 -0
  110. package/extension/prism/src/prettyprint.c +8939 -0
  111. package/extension/prism/src/prism.c +23184 -0
  112. package/extension/prism/src/regexp.c +790 -0
  113. package/extension/prism/src/serialize.c +2266 -0
  114. package/extension/prism/src/static_literals.c +617 -0
  115. package/extension/prism/src/token_type.c +701 -0
  116. package/extension/prism/src/util/pm_buffer.c +357 -0
  117. package/extension/prism/src/util/pm_char.c +318 -0
  118. package/extension/prism/src/util/pm_constant_pool.c +342 -0
  119. package/extension/prism/src/util/pm_integer.c +670 -0
  120. package/extension/prism/src/util/pm_list.c +49 -0
  121. package/extension/prism/src/util/pm_memchr.c +35 -0
  122. package/extension/prism/src/util/pm_newline_list.c +125 -0
  123. package/extension/prism/src/util/pm_string.c +383 -0
  124. package/extension/prism/src/util/pm_strncasecmp.c +36 -0
  125. package/extension/prism/src/util/pm_strpbrk.c +206 -0
  126. package/package.json +74 -0
  127. package/src/index-cjs.cts +22 -0
  128. package/src/index-esm.mts +28 -0
  129. package/src/node-backend.ts +9 -0
  130. package/src/types/global.d.ts +18 -0
  131. package/src/util.ts +35 -0
@@ -0,0 +1,206 @@
1
+ #include "prism/util/pm_strpbrk.h"
2
+
3
+ /**
4
+ * Add an invalid multibyte character error to the parser.
5
+ */
6
+ static inline void
7
+ pm_strpbrk_invalid_multibyte_character(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
8
+ pm_diagnostic_list_append_format(&parser->error_list, start, end, PM_ERR_INVALID_MULTIBYTE_CHARACTER, *start);
9
+ }
10
+
11
+ /**
12
+ * Set the explicit encoding for the parser to the current encoding.
13
+ */
14
+ static inline void
15
+ pm_strpbrk_explicit_encoding_set(pm_parser_t *parser, const uint8_t *source, size_t width) {
16
+ if (parser->explicit_encoding != NULL) {
17
+ if (parser->explicit_encoding == parser->encoding) {
18
+ // Okay, we already locked to this encoding.
19
+ } else if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) {
20
+ // Not okay, we already found a Unicode escape sequence and this
21
+ // conflicts.
22
+ pm_diagnostic_list_append_format(&parser->error_list, source, source + width, PM_ERR_MIXED_ENCODING, parser->encoding->name);
23
+ } else {
24
+ // Should not be anything else.
25
+ assert(false && "unreachable");
26
+ }
27
+ }
28
+
29
+ parser->explicit_encoding = parser->encoding;
30
+ }
31
+
32
+ /**
33
+ * This is the default path.
34
+ */
35
+ static inline const uint8_t *
36
+ pm_strpbrk_utf8(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, size_t maximum, bool validate) {
37
+ size_t index = 0;
38
+
39
+ while (index < maximum) {
40
+ if (strchr((const char *) charset, source[index]) != NULL) {
41
+ return source + index;
42
+ }
43
+
44
+ if (source[index] < 0x80) {
45
+ index++;
46
+ } else {
47
+ size_t width = pm_encoding_utf_8_char_width(source + index, (ptrdiff_t) (maximum - index));
48
+
49
+ if (width > 0) {
50
+ index += width;
51
+ } else if (!validate) {
52
+ index++;
53
+ } else {
54
+ // At this point we know we have an invalid multibyte character.
55
+ // We'll walk forward as far as we can until we find the next
56
+ // valid character so that we don't spam the user with a ton of
57
+ // the same kind of error.
58
+ const size_t start = index;
59
+
60
+ do {
61
+ index++;
62
+ } while (index < maximum && pm_encoding_utf_8_char_width(source + index, (ptrdiff_t) (maximum - index)) == 0);
63
+
64
+ pm_strpbrk_invalid_multibyte_character(parser, source + start, source + index);
65
+ }
66
+ }
67
+ }
68
+
69
+ return NULL;
70
+ }
71
+
72
+ /**
73
+ * This is the path when the encoding is ASCII-8BIT.
74
+ */
75
+ static inline const uint8_t *
76
+ pm_strpbrk_ascii_8bit(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, size_t maximum, bool validate) {
77
+ size_t index = 0;
78
+
79
+ while (index < maximum) {
80
+ if (strchr((const char *) charset, source[index]) != NULL) {
81
+ return source + index;
82
+ }
83
+
84
+ if (validate && source[index] >= 0x80) pm_strpbrk_explicit_encoding_set(parser, source, 1);
85
+ index++;
86
+ }
87
+
88
+ return NULL;
89
+ }
90
+
91
+ /**
92
+ * This is the slow path that does care about the encoding.
93
+ */
94
+ static inline const uint8_t *
95
+ pm_strpbrk_multi_byte(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, size_t maximum, bool validate) {
96
+ size_t index = 0;
97
+ const pm_encoding_t *encoding = parser->encoding;
98
+
99
+ while (index < maximum) {
100
+ if (strchr((const char *) charset, source[index]) != NULL) {
101
+ return source + index;
102
+ }
103
+
104
+ if (source[index] < 0x80) {
105
+ index++;
106
+ } else {
107
+ size_t width = encoding->char_width(source + index, (ptrdiff_t) (maximum - index));
108
+ if (validate) pm_strpbrk_explicit_encoding_set(parser, source, width);
109
+
110
+ if (width > 0) {
111
+ index += width;
112
+ } else if (!validate) {
113
+ index++;
114
+ } else {
115
+ // At this point we know we have an invalid multibyte character.
116
+ // We'll walk forward as far as we can until we find the next
117
+ // valid character so that we don't spam the user with a ton of
118
+ // the same kind of error.
119
+ const size_t start = index;
120
+
121
+ do {
122
+ index++;
123
+ } while (index < maximum && encoding->char_width(source + index, (ptrdiff_t) (maximum - index)) == 0);
124
+
125
+ pm_strpbrk_invalid_multibyte_character(parser, source + start, source + index);
126
+ }
127
+ }
128
+ }
129
+
130
+ return NULL;
131
+ }
132
+
133
+ /**
134
+ * This is the fast path that does not care about the encoding because we know
135
+ * the encoding only supports single-byte characters.
136
+ */
137
+ static inline const uint8_t *
138
+ pm_strpbrk_single_byte(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, size_t maximum, bool validate) {
139
+ size_t index = 0;
140
+ const pm_encoding_t *encoding = parser->encoding;
141
+
142
+ while (index < maximum) {
143
+ if (strchr((const char *) charset, source[index]) != NULL) {
144
+ return source + index;
145
+ }
146
+
147
+ if (source[index] < 0x80 || !validate) {
148
+ index++;
149
+ } else {
150
+ size_t width = encoding->char_width(source + index, (ptrdiff_t) (maximum - index));
151
+ pm_strpbrk_explicit_encoding_set(parser, source, width);
152
+
153
+ if (width > 0) {
154
+ index += width;
155
+ } else {
156
+ // At this point we know we have an invalid multibyte character.
157
+ // We'll walk forward as far as we can until we find the next
158
+ // valid character so that we don't spam the user with a ton of
159
+ // the same kind of error.
160
+ const size_t start = index;
161
+
162
+ do {
163
+ index++;
164
+ } while (index < maximum && encoding->char_width(source + index, (ptrdiff_t) (maximum - index)) == 0);
165
+
166
+ pm_strpbrk_invalid_multibyte_character(parser, source + start, source + index);
167
+ }
168
+ }
169
+ }
170
+
171
+ return NULL;
172
+ }
173
+
174
+ /**
175
+ * Here we have rolled our own version of strpbrk. The standard library strpbrk
176
+ * has undefined behavior when the source string is not null-terminated. We want
177
+ * to support strings that are not null-terminated because pm_parse does not
178
+ * have the contract that the string is null-terminated. (This is desirable
179
+ * because it means the extension can call pm_parse with the result of a call to
180
+ * mmap).
181
+ *
182
+ * The standard library strpbrk also does not support passing a maximum length
183
+ * to search. We want to support this for the reason mentioned above, but we
184
+ * also don't want it to stop on null bytes. Ruby actually allows null bytes
185
+ * within strings, comments, regular expressions, etc. So we need to be able to
186
+ * skip past them.
187
+ *
188
+ * Finally, we want to support encodings wherein the charset could contain
189
+ * characters that are trailing bytes of multi-byte characters. For example, in
190
+ * Shift_JIS, the backslash character can be a trailing byte. In that case we
191
+ * need to take a slower path and iterate one multi-byte character at a time.
192
+ */
193
+ const uint8_t *
194
+ pm_strpbrk(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, ptrdiff_t length, bool validate) {
195
+ if (length <= 0) {
196
+ return NULL;
197
+ } else if (!parser->encoding_changed) {
198
+ return pm_strpbrk_utf8(parser, source, charset, (size_t) length, validate);
199
+ } else if (parser->encoding == PM_ENCODING_ASCII_8BIT_ENTRY) {
200
+ return pm_strpbrk_ascii_8bit(parser, source, charset, (size_t) length, validate);
201
+ } else if (parser->encoding->multibyte) {
202
+ return pm_strpbrk_multi_byte(parser, source, charset, (size_t) length, validate);
203
+ } else {
204
+ return pm_strpbrk_single_byte(parser, source, charset, (size_t) length, validate);
205
+ }
206
+ }
package/package.json ADDED
@@ -0,0 +1,74 @@
1
+ {
2
+ "name": "@herb-tools/node",
3
+ "version": "0.1.0",
4
+ "type": "module",
5
+ "license": "MIT",
6
+ "homepage": "https://herb-tools.dev",
7
+ "bugs": "https://github.com/marcoroth/herb/issues/new?title=Package%20%60@herb-tools/node%60:%20",
8
+ "repository": {
9
+ "type": "git",
10
+ "url": "https://github.com/marcoroth/herb.git",
11
+ "directory": "javascript/packages/node"
12
+ },
13
+ "gypfile": true,
14
+ "main": "./dist/herb-node.cjs",
15
+ "module": "./dist/herb-node.esm.js",
16
+ "types": "./dist/types/index-esm.d.mts",
17
+ "exports": {
18
+ "./package.json": "./package.json",
19
+ ".": {
20
+ "types": "./dist/types/index-esm.d.mts",
21
+ "import": "./dist/herb-node.esm.js",
22
+ "require": "./dist/herb-node.cjs",
23
+ "default": "./dist/herb-node.esm.js"
24
+ }
25
+ },
26
+ "scripts": {
27
+ "install": "yarn vendor:libherb && yarn vendor:prism && node-pre-gyp install --fallback-to-build",
28
+ "dev": "rollup -c -w",
29
+ "build": "yarn rebuild",
30
+ "build:javascript": "rollup -c",
31
+ "vendor:libherb": "yarn clean && rake templates && cp -r ../../../src ./extension/libherb",
32
+ "vendor:prism": "node ./bin/vendor-prism.cjs",
33
+ "rebuild": "yarn clean && yarn vendor:libherb && yarn vendor:prism && node-pre-gyp rebuild && yarn build:javascript",
34
+ "configure": "node-pre-gyp configure",
35
+ "package": "node-pre-gyp package",
36
+ "clean": "node-pre-gyp clean && rimraf dist/ && rimraf extension/libherb/ && rimraf build/",
37
+ "test": "vitest run",
38
+ "test:watch": "vitest --watch",
39
+ "prepublishOnly": "yarn clean && yarn build && yarn test"
40
+ },
41
+ "binary": {
42
+ "module_name": "herb",
43
+ "module_path": "./build/{configuration}/{node_abi}-{platform}-{arch}/",
44
+ "remote_path": "{version}",
45
+ "package_name": "{module_name}-v{version}-{node_abi}-{platform}-{arch}.tar.gz",
46
+ "host": "https://github.com/marcoroth/herb/releases/download/"
47
+ },
48
+ "dependencies": {
49
+ "@herb-tools/core": "0.1.0",
50
+ "@mapbox/node-pre-gyp": "^2.0.0"
51
+ },
52
+ "devDependencies": {
53
+ "@rollup/plugin-json": "^6.1.0",
54
+ "@rollup/plugin-node-resolve": "^16.0.1",
55
+ "@rollup/plugin-typescript": "^12.1.2",
56
+ "node-addon-api": "^5.1.0",
57
+ "node-pre-gyp-github": "^2.0.0",
58
+ "rimraf": "^6.0.1",
59
+ "rollup": "^4.35.0",
60
+ "tslib": "^2.8.1",
61
+ "typescript": "^5.8.2",
62
+ "vitest": "^3.0.0"
63
+ },
64
+ "files": [
65
+ "package.json",
66
+ "README.md",
67
+ "binding.gyp",
68
+ "dist/",
69
+ "src/",
70
+ "extension/**/*.c",
71
+ "extension/**/*.cpp",
72
+ "extension/**/*.h"
73
+ ]
74
+ }
@@ -0,0 +1,22 @@
1
+ const path = require("path")
2
+ const binary = require("@mapbox/node-pre-gyp")
3
+
4
+ const { Visitor } = require("@herb-tools/core")
5
+ const { HerbBackendNode } = require("./node-backend.js")
6
+
7
+ const packagePath = path.resolve(__dirname, "../package.json")
8
+ const libherbPath = binary.find(packagePath)
9
+ const libHerbBinary = require(libherbPath)
10
+
11
+ /**
12
+ * An instance of the `Herb` class using a Node.js backend.
13
+ * This loads `libherb` in a Node.js C++ native extension.
14
+ */
15
+ const Herb = new HerbBackendNode(
16
+ new Promise((resolve, _reject) => resolve(libHerbBinary)),
17
+ )
18
+
19
+ module.exports = {
20
+ Herb: Herb,
21
+ Visitor: Visitor,
22
+ }
@@ -0,0 +1,28 @@
1
+ export * from "@herb-tools/core"
2
+
3
+ import path from "path"
4
+ import binary from "@mapbox/node-pre-gyp"
5
+
6
+ import { createRequire } from "module"
7
+ import { fileURLToPath } from "url"
8
+
9
+ const __filename = fileURLToPath(import.meta.url)
10
+ const __dirname = path.dirname(__filename)
11
+ const require = createRequire(import.meta.url)
12
+
13
+ const packagePath = path.resolve(__dirname, "../package.json")
14
+ const libherbPath = binary.find(packagePath)
15
+
16
+ const libHerbBinary = require(libherbPath)
17
+
18
+ import { HerbBackendNode } from "./node-backend.js"
19
+
20
+ /**
21
+ * An instance of the `Herb` class using a Node.js backend.
22
+ * This loads `libherb` in a Node.js C++ native extension.
23
+ */
24
+ const Herb = new HerbBackendNode(
25
+ () => new Promise((resolve, _reject) => resolve(libHerbBinary)),
26
+ )
27
+
28
+ export { Herb, HerbBackendNode }
@@ -0,0 +1,9 @@
1
+ import packageJSON from "../package.json" with { type: "json" }
2
+
3
+ import { HerbBackend } from "@herb-tools/core"
4
+
5
+ export class HerbBackendNode extends HerbBackend {
6
+ backendVersion(): string {
7
+ return `${packageJSON.name}@${packageJSON.version}`
8
+ }
9
+ }
@@ -0,0 +1,18 @@
1
+ declare module "*.node" {
2
+ const content: any
3
+ export default content
4
+ }
5
+
6
+ declare module "@mapbox/node-pre-gyp" {
7
+ export interface FindOptions {
8
+ module_root?: string
9
+ [key: string]: any
10
+ }
11
+
12
+ export interface NodePreGyp {
13
+ find(packageJsonPath: string, opts?: FindOptions): string
14
+ }
15
+
16
+ declare const nodePreGyp: NodePreGyp
17
+ export default nodePreGyp
18
+ }
package/src/util.ts ADDED
@@ -0,0 +1,35 @@
1
+ import path from "path"
2
+ import { stat as fileExists } from "node:fs/promises"
3
+ import { ensureString } from "@herb-tools/core"
4
+
5
+ export async function ensureFile(object: any): Promise<string> {
6
+ const string = ensureString(object)
7
+
8
+ if (await fileExists(string)) {
9
+ return string
10
+ }
11
+
12
+ throw new TypeError("Argument must be a string")
13
+ }
14
+
15
+ export function resolvePath(relativePath: string) {
16
+ let basePath
17
+ // Check if we"re in ESM or CJS context
18
+ if (typeof __dirname !== "undefined") {
19
+ // CommonJS environment
20
+ basePath = __dirname
21
+ } else {
22
+ // ESM environment - need to use import.meta.url
23
+ // This needs to be in a try/catch for bundlers and environments that don"t support it
24
+ try {
25
+ const { fileURLToPath } = require("url")
26
+ const currentFileUrl = import.meta.url
27
+ basePath = path.dirname(fileURLToPath(currentFileUrl))
28
+ } catch (error) {
29
+ // Fallback for environments where neither is available
30
+ basePath = process.cwd()
31
+ }
32
+ }
33
+
34
+ return path.join(basePath, relativePath)
35
+ }