blusher 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (241) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +29 -0
  3. data/Cargo.lock +373 -0
  4. data/Cargo.toml +6 -0
  5. data/LICENSE +25 -0
  6. data/README.md +98 -0
  7. data/ext/blusher/Cargo.toml +18 -0
  8. data/ext/blusher/extconf.rb +10 -0
  9. data/ext/blusher/src/lib.rs +167 -0
  10. data/lib/blusher/native.rb +115 -0
  11. data/lib/blusher/shim.rb +134 -0
  12. data/lib/blusher/tables/abap.json +1 -0
  13. data/lib/blusher/tables/actionscript.json +1 -0
  14. data/lib/blusher/tables/ada.json +1 -0
  15. data/lib/blusher/tables/apache.json +1 -0
  16. data/lib/blusher/tables/apex.json +1 -0
  17. data/lib/blusher/tables/apiblueprint.json +1 -0
  18. data/lib/blusher/tables/applescript.json +1 -0
  19. data/lib/blusher/tables/armasm.json +1 -0
  20. data/lib/blusher/tables/augeas.json +1 -0
  21. data/lib/blusher/tables/awk.json +1 -0
  22. data/lib/blusher/tables/batchfile.json +1 -0
  23. data/lib/blusher/tables/bbcbasic.json +1 -0
  24. data/lib/blusher/tables/bibtex.json +1 -0
  25. data/lib/blusher/tables/bicep.json +1 -0
  26. data/lib/blusher/tables/biml.json +1 -0
  27. data/lib/blusher/tables/bpf.json +1 -0
  28. data/lib/blusher/tables/brainfuck.json +1 -0
  29. data/lib/blusher/tables/brightscript.json +1 -0
  30. data/lib/blusher/tables/bsl.json +1 -0
  31. data/lib/blusher/tables/c.json +1 -0
  32. data/lib/blusher/tables/cfscript.json +1 -0
  33. data/lib/blusher/tables/cisco_ios.json +1 -0
  34. data/lib/blusher/tables/clean.json +1 -0
  35. data/lib/blusher/tables/clojure.json +1 -0
  36. data/lib/blusher/tables/cmake.json +1 -0
  37. data/lib/blusher/tables/cmhg.json +1 -0
  38. data/lib/blusher/tables/cobol.json +1 -0
  39. data/lib/blusher/tables/codeowners.json +1 -0
  40. data/lib/blusher/tables/coffeescript.json +1 -0
  41. data/lib/blusher/tables/common_lisp.json +1 -0
  42. data/lib/blusher/tables/conf.json +1 -0
  43. data/lib/blusher/tables/cpp.json +1 -0
  44. data/lib/blusher/tables/crystal.json +1 -0
  45. data/lib/blusher/tables/csharp.json +1 -0
  46. data/lib/blusher/tables/css.json +1 -0
  47. data/lib/blusher/tables/csvs.json +1 -0
  48. data/lib/blusher/tables/cuda.json +1 -0
  49. data/lib/blusher/tables/cypher.json +1 -0
  50. data/lib/blusher/tables/cython.json +1 -0
  51. data/lib/blusher/tables/d.json +1 -0
  52. data/lib/blusher/tables/dafny.json +1 -0
  53. data/lib/blusher/tables/dart.json +1 -0
  54. data/lib/blusher/tables/datastudio.json +1 -0
  55. data/lib/blusher/tables/diff.json +1 -0
  56. data/lib/blusher/tables/digdag.json +1 -0
  57. data/lib/blusher/tables/docker.json +1 -0
  58. data/lib/blusher/tables/dot.json +1 -0
  59. data/lib/blusher/tables/dylan.json +1 -0
  60. data/lib/blusher/tables/ecl.json +1 -0
  61. data/lib/blusher/tables/eex.json +1 -0
  62. data/lib/blusher/tables/eiffel.json +1 -0
  63. data/lib/blusher/tables/elixir.json +1 -0
  64. data/lib/blusher/tables/elm.json +1 -0
  65. data/lib/blusher/tables/email.json +1 -0
  66. data/lib/blusher/tables/epp.json +1 -0
  67. data/lib/blusher/tables/erb.json +1 -0
  68. data/lib/blusher/tables/erlang.json +1 -0
  69. data/lib/blusher/tables/factor.json +1 -0
  70. data/lib/blusher/tables/fluent.json +1 -0
  71. data/lib/blusher/tables/fortran.json +1 -0
  72. data/lib/blusher/tables/freefem.json +1 -0
  73. data/lib/blusher/tables/fsharp.json +1 -0
  74. data/lib/blusher/tables/gdscript.json +1 -0
  75. data/lib/blusher/tables/ghc-cmm.json +1 -0
  76. data/lib/blusher/tables/ghc-core.json +1 -0
  77. data/lib/blusher/tables/gjs.json +1 -0
  78. data/lib/blusher/tables/glsl.json +1 -0
  79. data/lib/blusher/tables/go.json +1 -0
  80. data/lib/blusher/tables/gradle.json +1 -0
  81. data/lib/blusher/tables/graphql.json +1 -0
  82. data/lib/blusher/tables/groovy.json +1 -0
  83. data/lib/blusher/tables/gts.json +1 -0
  84. data/lib/blusher/tables/hack.json +1 -0
  85. data/lib/blusher/tables/haml.json +1 -0
  86. data/lib/blusher/tables/handlebars.json +1 -0
  87. data/lib/blusher/tables/haskell.json +1 -0
  88. data/lib/blusher/tables/haxe.json +1 -0
  89. data/lib/blusher/tables/hcl.json +1 -0
  90. data/lib/blusher/tables/hlsl.json +1 -0
  91. data/lib/blusher/tables/hocon.json +1 -0
  92. data/lib/blusher/tables/hql.json +1 -0
  93. data/lib/blusher/tables/html.json +1 -0
  94. data/lib/blusher/tables/http.json +1 -0
  95. data/lib/blusher/tables/hylang.json +1 -0
  96. data/lib/blusher/tables/idlang.json +1 -0
  97. data/lib/blusher/tables/idris.json +1 -0
  98. data/lib/blusher/tables/iecst.json +1 -0
  99. data/lib/blusher/tables/igorpro.json +1 -0
  100. data/lib/blusher/tables/ini.json +1 -0
  101. data/lib/blusher/tables/io.json +1 -0
  102. data/lib/blusher/tables/irb_output.json +1 -0
  103. data/lib/blusher/tables/isabelle.json +1 -0
  104. data/lib/blusher/tables/isbl.json +1 -0
  105. data/lib/blusher/tables/j.json +1 -0
  106. data/lib/blusher/tables/janet.json +1 -0
  107. data/lib/blusher/tables/java.json +1 -0
  108. data/lib/blusher/tables/javascript.json +1 -0
  109. data/lib/blusher/tables/jinja.json +1 -0
  110. data/lib/blusher/tables/jsl.json +1 -0
  111. data/lib/blusher/tables/json-doc.json +1 -0
  112. data/lib/blusher/tables/json.json +1 -0
  113. data/lib/blusher/tables/json5.json +1 -0
  114. data/lib/blusher/tables/jsonnet.json +1 -0
  115. data/lib/blusher/tables/jsp.json +1 -0
  116. data/lib/blusher/tables/jsx.json +1 -0
  117. data/lib/blusher/tables/julia.json +1 -0
  118. data/lib/blusher/tables/kick_assembler.json +1 -0
  119. data/lib/blusher/tables/kotlin.json +1 -0
  120. data/lib/blusher/tables/lasso.json +1 -0
  121. data/lib/blusher/tables/lean.json +1 -0
  122. data/lib/blusher/tables/liquid.json +1 -0
  123. data/lib/blusher/tables/literate_coffeescript.json +1 -0
  124. data/lib/blusher/tables/literate_haskell.json +1 -0
  125. data/lib/blusher/tables/livescript.json +1 -0
  126. data/lib/blusher/tables/llvm.json +1 -0
  127. data/lib/blusher/tables/lua.json +1 -0
  128. data/lib/blusher/tables/lustre.json +1 -0
  129. data/lib/blusher/tables/lutin.json +1 -0
  130. data/lib/blusher/tables/m68k.json +1 -0
  131. data/lib/blusher/tables/magik.json +1 -0
  132. data/lib/blusher/tables/make.json +1 -0
  133. data/lib/blusher/tables/markdown.json +1 -0
  134. data/lib/blusher/tables/mason.json +1 -0
  135. data/lib/blusher/tables/mathematica.json +1 -0
  136. data/lib/blusher/tables/matlab.json +1 -0
  137. data/lib/blusher/tables/meson.json +1 -0
  138. data/lib/blusher/tables/minizinc.json +1 -0
  139. data/lib/blusher/tables/mojo.json +1 -0
  140. data/lib/blusher/tables/moonscript.json +1 -0
  141. data/lib/blusher/tables/mosel.json +1 -0
  142. data/lib/blusher/tables/msgtrans.json +1 -0
  143. data/lib/blusher/tables/mxml.json +1 -0
  144. data/lib/blusher/tables/nasm.json +1 -0
  145. data/lib/blusher/tables/nesasm.json +1 -0
  146. data/lib/blusher/tables/nginx.json +1 -0
  147. data/lib/blusher/tables/nial.json +1 -0
  148. data/lib/blusher/tables/nim.json +1 -0
  149. data/lib/blusher/tables/nix.json +1 -0
  150. data/lib/blusher/tables/objective_c.json +1 -0
  151. data/lib/blusher/tables/objective_cpp.json +1 -0
  152. data/lib/blusher/tables/ocaml.json +1 -0
  153. data/lib/blusher/tables/ocl.json +1 -0
  154. data/lib/blusher/tables/openedge.json +1 -0
  155. data/lib/blusher/tables/opentype_feature_file.json +1 -0
  156. data/lib/blusher/tables/p4.json +1 -0
  157. data/lib/blusher/tables/pascal.json +1 -0
  158. data/lib/blusher/tables/pdf.json +1 -0
  159. data/lib/blusher/tables/perl.json +1 -0
  160. data/lib/blusher/tables/php.json +1 -0
  161. data/lib/blusher/tables/plist.json +1 -0
  162. data/lib/blusher/tables/plsql.json +1 -0
  163. data/lib/blusher/tables/pony.json +1 -0
  164. data/lib/blusher/tables/postscript.json +1 -0
  165. data/lib/blusher/tables/powershell.json +1 -0
  166. data/lib/blusher/tables/praat.json +1 -0
  167. data/lib/blusher/tables/prolog.json +1 -0
  168. data/lib/blusher/tables/prometheus.json +1 -0
  169. data/lib/blusher/tables/properties.json +1 -0
  170. data/lib/blusher/tables/protobuf.json +1 -0
  171. data/lib/blusher/tables/puppet.json +1 -0
  172. data/lib/blusher/tables/python.json +1 -0
  173. data/lib/blusher/tables/q.json +1 -0
  174. data/lib/blusher/tables/qml.json +1 -0
  175. data/lib/blusher/tables/r.json +1 -0
  176. data/lib/blusher/tables/racket.json +1 -0
  177. data/lib/blusher/tables/reasonml.json +1 -0
  178. data/lib/blusher/tables/rego.json +1 -0
  179. data/lib/blusher/tables/rescript.json +1 -0
  180. data/lib/blusher/tables/rml.json +1 -0
  181. data/lib/blusher/tables/robot_framework.json +1 -0
  182. data/lib/blusher/tables/rocq.json +1 -0
  183. data/lib/blusher/tables/ruby.json +1 -0
  184. data/lib/blusher/tables/rust.json +1 -0
  185. data/lib/blusher/tables/sas.json +1 -0
  186. data/lib/blusher/tables/sass.json +1 -0
  187. data/lib/blusher/tables/scala.json +1 -0
  188. data/lib/blusher/tables/scheme.json +1 -0
  189. data/lib/blusher/tables/scss.json +1 -0
  190. data/lib/blusher/tables/sed.json +1 -0
  191. data/lib/blusher/tables/shell.json +1 -0
  192. data/lib/blusher/tables/sieve.json +1 -0
  193. data/lib/blusher/tables/slice.json +1 -0
  194. data/lib/blusher/tables/slim.json +1 -0
  195. data/lib/blusher/tables/smalltalk.json +1 -0
  196. data/lib/blusher/tables/smarty.json +1 -0
  197. data/lib/blusher/tables/sml.json +1 -0
  198. data/lib/blusher/tables/sparql.json +1 -0
  199. data/lib/blusher/tables/sqf.json +1 -0
  200. data/lib/blusher/tables/sql.json +1 -0
  201. data/lib/blusher/tables/ssh.json +1 -0
  202. data/lib/blusher/tables/stan.json +1 -0
  203. data/lib/blusher/tables/stata.json +1 -0
  204. data/lib/blusher/tables/supercollider.json +1 -0
  205. data/lib/blusher/tables/svelte.json +1 -0
  206. data/lib/blusher/tables/swift.json +1 -0
  207. data/lib/blusher/tables/systemd.json +1 -0
  208. data/lib/blusher/tables/syzlang.json +1 -0
  209. data/lib/blusher/tables/syzprog.json +1 -0
  210. data/lib/blusher/tables/tap.json +1 -0
  211. data/lib/blusher/tables/tcl.json +1 -0
  212. data/lib/blusher/tables/terraform.json +1 -0
  213. data/lib/blusher/tables/tex.json +1 -0
  214. data/lib/blusher/tables/thrift.json +1 -0
  215. data/lib/blusher/tables/toml.json +1 -0
  216. data/lib/blusher/tables/tsx.json +1 -0
  217. data/lib/blusher/tables/ttcn3.json +1 -0
  218. data/lib/blusher/tables/tulip.json +1 -0
  219. data/lib/blusher/tables/turtle.json +1 -0
  220. data/lib/blusher/tables/twig.json +1 -0
  221. data/lib/blusher/tables/typescript.json +1 -0
  222. data/lib/blusher/tables/vala.json +1 -0
  223. data/lib/blusher/tables/vb.json +1 -0
  224. data/lib/blusher/tables/vcl.json +1 -0
  225. data/lib/blusher/tables/velocity.json +1 -0
  226. data/lib/blusher/tables/verilog.json +1 -0
  227. data/lib/blusher/tables/veryl.json +1 -0
  228. data/lib/blusher/tables/vhdl.json +1 -0
  229. data/lib/blusher/tables/viml.json +1 -0
  230. data/lib/blusher/tables/vue.json +1 -0
  231. data/lib/blusher/tables/wollok.json +1 -0
  232. data/lib/blusher/tables/xml.json +1 -0
  233. data/lib/blusher/tables/xojo.json +1 -0
  234. data/lib/blusher/tables/xpath.json +1 -0
  235. data/lib/blusher/tables/xquery.json +1 -0
  236. data/lib/blusher/tables/yaml.json +1 -0
  237. data/lib/blusher/tables/yang.json +1 -0
  238. data/lib/blusher/tables/zig.json +1 -0
  239. data/lib/blusher/version.rb +5 -0
  240. data/lib/blusher.rb +18 -0
  241. metadata +320 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: d4bf0252e2f945302dbf33bdeb00d4f3dad51c25be02a8ff2996afbcec745cc0
4
+ data.tar.gz: 03e7eef5f6093112f7d4b37a5d1647017c83b72560740a53d8f5d6365492a504
5
+ SHA512:
6
+ metadata.gz: 676f07b45c6dfe42b28cb5b476bb9d179b238de517106e73e2c47afbe87a7fe1c63abc84a31f9cd8401de673b0542491ec441879bfbde7a46bfcb85aea5bd1a1
7
+ data.tar.gz: c3a1cc14b37d479369af5ac790c76b93c2c233c482a3463d2eb29ce1920e46745f4190158a30c8d08261337566bdc03a110c6115428133e61a4d61adb03ccafc
data/CHANGELOG.md ADDED
@@ -0,0 +1,29 @@
1
+ # Changelog
2
+
3
+ ## 0.1.0 (unreleased)
4
+
5
+ Initial release.
6
+
7
+ - Drop-in alternative lexing backend for rouge: `require "blusher"` routes
8
+ `Rouge::RegexLexer#lex` through the Rust `carmine` engine, with byte-identical
9
+ output or transparent fallback to rouge.
10
+ - Native engine via an rb-sys/magnus extension (builds the `[Token, value]`
11
+ pair array directly as Ruby objects); a Fiddle-loaded `carmine-ffi` cdylib
12
+ is kept as a dependency-light fallback.
13
+ - Callback-free **routability allowlist**: lexers whose tables contain rouge
14
+ `proc` rules carmine can't execute are skipped up front, so they never pay a
15
+ wasted native-lex attempt.
16
+ - **Fused lex+format HTML path**: for an unadorned `Rouge::Formatters::HTML`
17
+ pipeline (`Rouge.highlight`/Jekyll/kramdown default), `lex` returns a deferred
18
+ token stream and the patched `HTML#format` lexes AND formats in Rust, returning
19
+ one String — crossing the Ruby boundary once instead of per token. Other
20
+ formatters, the block form, and direct token consumers fall back to rouge.
21
+ - Thread-local cache of parsed/compiled tables (the ~0.5 ms build no longer
22
+ repeats per call).
23
+ - Correctness: rouge v5.0.0 full lexer spec suite — 757 runs, 5130 assertions,
24
+ 0 failures; fused HTML output byte-identical across all 126 routable visual
25
+ samples + rouge's HTML formatter specs.
26
+ - Performance: ~1.7× faster highlighting to HTML on a mixed real corpus
27
+ (2.5–2.7× on individual files), parity for callback lexers / non-HTML output.
28
+ (A token stream alone is only ~1.0×: Ruby object allocation at the boundary
29
+ dominates and both engines pay it — fusing the HTML output is what wins.)
data/Cargo.lock ADDED
@@ -0,0 +1,373 @@
1
+ # This file is automatically @generated by Cargo.
2
+ # It is not intended for manual editing.
3
+ version = 4
4
+
5
+ [[package]]
6
+ name = "aho-corasick"
7
+ version = "1.1.4"
8
+ source = "registry+https://github.com/rust-lang/crates.io-index"
9
+ checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301"
10
+ dependencies = [
11
+ "memchr",
12
+ ]
13
+
14
+ [[package]]
15
+ name = "bindgen"
16
+ version = "0.72.1"
17
+ source = "registry+https://github.com/rust-lang/crates.io-index"
18
+ checksum = "993776b509cfb49c750f11b8f07a46fa23e0a1386ffc01fb1e7d343efc387895"
19
+ dependencies = [
20
+ "bitflags",
21
+ "cexpr",
22
+ "clang-sys",
23
+ "itertools",
24
+ "proc-macro2",
25
+ "quote",
26
+ "regex",
27
+ "rustc-hash",
28
+ "shlex",
29
+ "syn",
30
+ ]
31
+
32
+ [[package]]
33
+ name = "bit-set"
34
+ version = "0.8.0"
35
+ source = "registry+https://github.com/rust-lang/crates.io-index"
36
+ checksum = "08807e080ed7f9d5433fa9b275196cfc35414f66a0c79d864dc51a0d825231a3"
37
+ dependencies = [
38
+ "bit-vec",
39
+ ]
40
+
41
+ [[package]]
42
+ name = "bit-vec"
43
+ version = "0.8.0"
44
+ source = "registry+https://github.com/rust-lang/crates.io-index"
45
+ checksum = "5e764a1d40d510daf35e07be9eb06e75770908c27d411ee6c92109c9840eaaf7"
46
+
47
+ [[package]]
48
+ name = "bitflags"
49
+ version = "2.13.0"
50
+ source = "registry+https://github.com/rust-lang/crates.io-index"
51
+ checksum = "b4388bee8683e3d04af747c73422af53102d2bd24d9eadb6cbc100baef4b43f8"
52
+
53
+ [[package]]
54
+ name = "blusher"
55
+ version = "0.1.0"
56
+ dependencies = [
57
+ "carmine",
58
+ "magnus",
59
+ ]
60
+
61
+ [[package]]
62
+ name = "carmine"
63
+ version = "0.3.0"
64
+ source = "registry+https://github.com/rust-lang/crates.io-index"
65
+ checksum = "415476756c3ae81a94343572754fe6f11a5f5a7a1ead77956327e0afbc5e2934"
66
+ dependencies = [
67
+ "fancy-regex",
68
+ "regex-automata",
69
+ "serde_json",
70
+ ]
71
+
72
+ [[package]]
73
+ name = "cexpr"
74
+ version = "0.6.0"
75
+ source = "registry+https://github.com/rust-lang/crates.io-index"
76
+ checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766"
77
+ dependencies = [
78
+ "nom",
79
+ ]
80
+
81
+ [[package]]
82
+ name = "cfg-if"
83
+ version = "1.0.4"
84
+ source = "registry+https://github.com/rust-lang/crates.io-index"
85
+ checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801"
86
+
87
+ [[package]]
88
+ name = "clang-sys"
89
+ version = "1.8.1"
90
+ source = "registry+https://github.com/rust-lang/crates.io-index"
91
+ checksum = "0b023947811758c97c59bf9d1c188fd619ad4718dcaa767947df1cadb14f39f4"
92
+ dependencies = [
93
+ "glob",
94
+ "libc",
95
+ "libloading",
96
+ ]
97
+
98
+ [[package]]
99
+ name = "either"
100
+ version = "1.16.0"
101
+ source = "registry+https://github.com/rust-lang/crates.io-index"
102
+ checksum = "91622ff5e7162018101f2fea40d6ebf4a78bbe5a49736a2020649edf9693679e"
103
+
104
+ [[package]]
105
+ name = "fancy-regex"
106
+ version = "0.18.0"
107
+ source = "registry+https://github.com/rust-lang/crates.io-index"
108
+ checksum = "e1e1dacd0d2082dfcf1351c4bdd566bbe89a2b263235a2b50058f1e130a47277"
109
+ dependencies = [
110
+ "bit-set",
111
+ "regex-automata",
112
+ "regex-syntax",
113
+ ]
114
+
115
+ [[package]]
116
+ name = "glob"
117
+ version = "0.3.3"
118
+ source = "registry+https://github.com/rust-lang/crates.io-index"
119
+ checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280"
120
+
121
+ [[package]]
122
+ name = "itertools"
123
+ version = "0.13.0"
124
+ source = "registry+https://github.com/rust-lang/crates.io-index"
125
+ checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186"
126
+ dependencies = [
127
+ "either",
128
+ ]
129
+
130
+ [[package]]
131
+ name = "itoa"
132
+ version = "1.0.18"
133
+ source = "registry+https://github.com/rust-lang/crates.io-index"
134
+ checksum = "8f42a60cbdf9a97f5d2305f08a87dc4e09308d1276d28c869c684d7777685682"
135
+
136
+ [[package]]
137
+ name = "lazy_static"
138
+ version = "1.5.0"
139
+ source = "registry+https://github.com/rust-lang/crates.io-index"
140
+ checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe"
141
+
142
+ [[package]]
143
+ name = "libc"
144
+ version = "0.2.186"
145
+ source = "registry+https://github.com/rust-lang/crates.io-index"
146
+ checksum = "68ab91017fe16c622486840e4c83c9a37afeff978bd239b5293d61ece587de66"
147
+
148
+ [[package]]
149
+ name = "libloading"
150
+ version = "0.8.9"
151
+ source = "registry+https://github.com/rust-lang/crates.io-index"
152
+ checksum = "d7c4b02199fee7c5d21a5ae7d8cfa79a6ef5bb2fc834d6e9058e89c825efdc55"
153
+ dependencies = [
154
+ "cfg-if",
155
+ "windows-link",
156
+ ]
157
+
158
+ [[package]]
159
+ name = "magnus"
160
+ version = "0.7.1"
161
+ source = "registry+https://github.com/rust-lang/crates.io-index"
162
+ checksum = "3d87ae53030f3a22e83879e666cb94e58a7bdf31706878a0ba48752994146dab"
163
+ dependencies = [
164
+ "magnus-macros",
165
+ "rb-sys",
166
+ "rb-sys-env",
167
+ "seq-macro",
168
+ ]
169
+
170
+ [[package]]
171
+ name = "magnus-macros"
172
+ version = "0.6.0"
173
+ source = "registry+https://github.com/rust-lang/crates.io-index"
174
+ checksum = "5968c820e2960565f647819f5928a42d6e874551cab9d88d75e3e0660d7f71e3"
175
+ dependencies = [
176
+ "proc-macro2",
177
+ "quote",
178
+ "syn",
179
+ ]
180
+
181
+ [[package]]
182
+ name = "memchr"
183
+ version = "2.8.2"
184
+ source = "registry+https://github.com/rust-lang/crates.io-index"
185
+ checksum = "88904434abc2901f197fe8cc55f0445e7ded921dba5911dad2e2b39b48e663c4"
186
+
187
+ [[package]]
188
+ name = "minimal-lexical"
189
+ version = "0.2.1"
190
+ source = "registry+https://github.com/rust-lang/crates.io-index"
191
+ checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
192
+
193
+ [[package]]
194
+ name = "nom"
195
+ version = "7.1.3"
196
+ source = "registry+https://github.com/rust-lang/crates.io-index"
197
+ checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a"
198
+ dependencies = [
199
+ "memchr",
200
+ "minimal-lexical",
201
+ ]
202
+
203
+ [[package]]
204
+ name = "proc-macro2"
205
+ version = "1.0.106"
206
+ source = "registry+https://github.com/rust-lang/crates.io-index"
207
+ checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934"
208
+ dependencies = [
209
+ "unicode-ident",
210
+ ]
211
+
212
+ [[package]]
213
+ name = "quote"
214
+ version = "1.0.45"
215
+ source = "registry+https://github.com/rust-lang/crates.io-index"
216
+ checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924"
217
+ dependencies = [
218
+ "proc-macro2",
219
+ ]
220
+
221
+ [[package]]
222
+ name = "rb-sys"
223
+ version = "0.9.128"
224
+ source = "registry+https://github.com/rust-lang/crates.io-index"
225
+ checksum = "45ca28513560e56cfb79a62b1fce363c73af170a182024ce880c77ee9429920a"
226
+ dependencies = [
227
+ "rb-sys-build",
228
+ ]
229
+
230
+ [[package]]
231
+ name = "rb-sys-build"
232
+ version = "0.9.128"
233
+ source = "registry+https://github.com/rust-lang/crates.io-index"
234
+ checksum = "ce04b2c55eff3a21aaa623fcc655d94373238e72cac6b3e1a3641ff31649f99a"
235
+ dependencies = [
236
+ "bindgen",
237
+ "lazy_static",
238
+ "proc-macro2",
239
+ "quote",
240
+ "regex",
241
+ "shell-words",
242
+ "syn",
243
+ ]
244
+
245
+ [[package]]
246
+ name = "rb-sys-env"
247
+ version = "0.1.2"
248
+ source = "registry+https://github.com/rust-lang/crates.io-index"
249
+ checksum = "a35802679f07360454b418a5d1735c89716bde01d35b1560fc953c1415a0b3bb"
250
+
251
+ [[package]]
252
+ name = "regex"
253
+ version = "1.12.4"
254
+ source = "registry+https://github.com/rust-lang/crates.io-index"
255
+ checksum = "f1292b7759ae1cb9ec195452d1390a074f0cd8541ab7a5a8c31cd6db45d4a6ba"
256
+ dependencies = [
257
+ "aho-corasick",
258
+ "memchr",
259
+ "regex-automata",
260
+ "regex-syntax",
261
+ ]
262
+
263
+ [[package]]
264
+ name = "regex-automata"
265
+ version = "0.4.14"
266
+ source = "registry+https://github.com/rust-lang/crates.io-index"
267
+ checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f"
268
+ dependencies = [
269
+ "aho-corasick",
270
+ "memchr",
271
+ "regex-syntax",
272
+ ]
273
+
274
+ [[package]]
275
+ name = "regex-syntax"
276
+ version = "0.8.11"
277
+ source = "registry+https://github.com/rust-lang/crates.io-index"
278
+ checksum = "d6f6ff9a378485b298a5286656da665ba74413d36db0979633275d2e708145d4"
279
+
280
+ [[package]]
281
+ name = "rustc-hash"
282
+ version = "2.1.2"
283
+ source = "registry+https://github.com/rust-lang/crates.io-index"
284
+ checksum = "94300abf3f1ae2e2b8ffb7b58043de3d399c73fa6f4b73826402a5c457614dbe"
285
+
286
+ [[package]]
287
+ name = "seq-macro"
288
+ version = "0.3.6"
289
+ source = "registry+https://github.com/rust-lang/crates.io-index"
290
+ checksum = "1bc711410fbe7399f390ca1c3b60ad0f53f80e95c5eb935e52268a0e2cd49acc"
291
+
292
+ [[package]]
293
+ name = "serde"
294
+ version = "1.0.228"
295
+ source = "registry+https://github.com/rust-lang/crates.io-index"
296
+ checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e"
297
+ dependencies = [
298
+ "serde_core",
299
+ ]
300
+
301
+ [[package]]
302
+ name = "serde_core"
303
+ version = "1.0.228"
304
+ source = "registry+https://github.com/rust-lang/crates.io-index"
305
+ checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad"
306
+ dependencies = [
307
+ "serde_derive",
308
+ ]
309
+
310
+ [[package]]
311
+ name = "serde_derive"
312
+ version = "1.0.228"
313
+ source = "registry+https://github.com/rust-lang/crates.io-index"
314
+ checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79"
315
+ dependencies = [
316
+ "proc-macro2",
317
+ "quote",
318
+ "syn",
319
+ ]
320
+
321
+ [[package]]
322
+ name = "serde_json"
323
+ version = "1.0.150"
324
+ source = "registry+https://github.com/rust-lang/crates.io-index"
325
+ checksum = "e8014e44b4736ed0538adeecded0fce2a272f22dc9578a7eb6b2d9993c74cfb9"
326
+ dependencies = [
327
+ "itoa",
328
+ "memchr",
329
+ "serde",
330
+ "serde_core",
331
+ "zmij",
332
+ ]
333
+
334
+ [[package]]
335
+ name = "shell-words"
336
+ version = "1.1.1"
337
+ source = "registry+https://github.com/rust-lang/crates.io-index"
338
+ checksum = "dc6fe69c597f9c37bfeeeeeb33da3530379845f10be461a66d16d03eca2ded77"
339
+
340
+ [[package]]
341
+ name = "shlex"
342
+ version = "1.3.0"
343
+ source = "registry+https://github.com/rust-lang/crates.io-index"
344
+ checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
345
+
346
+ [[package]]
347
+ name = "syn"
348
+ version = "2.0.118"
349
+ source = "registry+https://github.com/rust-lang/crates.io-index"
350
+ checksum = "1b9ae57f904213ebb649ce6895b8a66c66f0203b9319718f69a5612a065b1422"
351
+ dependencies = [
352
+ "proc-macro2",
353
+ "quote",
354
+ "unicode-ident",
355
+ ]
356
+
357
+ [[package]]
358
+ name = "unicode-ident"
359
+ version = "1.0.24"
360
+ source = "registry+https://github.com/rust-lang/crates.io-index"
361
+ checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75"
362
+
363
+ [[package]]
364
+ name = "windows-link"
365
+ version = "0.2.1"
366
+ source = "registry+https://github.com/rust-lang/crates.io-index"
367
+ checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5"
368
+
369
+ [[package]]
370
+ name = "zmij"
371
+ version = "1.0.21"
372
+ source = "registry+https://github.com/rust-lang/crates.io-index"
373
+ checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa"
data/Cargo.toml ADDED
@@ -0,0 +1,6 @@
1
+ # Workspace root for the gem's native extension. rb-sys/rake-compiler runs
2
+ # `cargo metadata` from here to discover the `blusher` crate in ext/blusher,
3
+ # and a committed Cargo.lock makes install-time builds reproducible.
4
+ [workspace]
5
+ members = ["ext/blusher"]
6
+ resolver = "2"
data/LICENSE ADDED
@@ -0,0 +1,25 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 momiji-rs
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ Lexer rule tables under lib/blusher/tables/ are derived from rouge
16
+ (https://github.com/rouge-ruby/rouge), MIT-licensed, © Jeanine Adkisson and
17
+ contributors.
18
+
19
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
22
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
24
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
25
+ SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,98 @@
1
+ # blusher
2
+
3
+ A **faster, drop-in, byte-for-byte-compatible** alternative backend for Ruby's
4
+ [rouge](https://github.com/rouge-ruby/rouge), powered by the Rust
5
+ [`carmine`](https://crates.io/crates/carmine) engine.
6
+
7
+ `require "blusher"` routes rouge's lexing — and, for the common case, its HTML
8
+ formatting — through carmine, which executes rule tables extracted from rouge's
9
+ own lexers. carmine either produces **byte-identical** output or **declines**,
10
+ in which case blusher falls back to rouge unchanged. Zero code change, zero
11
+ divergence; **~1.7× faster** highlighting to HTML on a mixed corpus (more on
12
+ large files), parity elsewhere.
13
+
14
+ ```ruby
15
+ require "rouge"
16
+ require "blusher" # ← that's it
17
+
18
+ # The hot path: lexing + HTML formatting fused in Rust, one String returned.
19
+ html = Rouge.highlight(File.read("data.json"), "json", "html")
20
+ ```
21
+
22
+ ## Performance
23
+
24
+ **For the HTML-highlighting path — what rouge is overwhelmingly used for —
25
+ blusher is ~1.7× faster on a mixed real corpus and 2.5–2.7× on individual
26
+ files**, with byte-identical output. Measured over rouge's own 126 routable
27
+ visual sample files (690 KiB) rendered to HTML with `Rouge::Formatters::HTML`:
28
+
29
+ ```
30
+ ms/pass MB/s
31
+ rouge 187.1 3.8
32
+ blusher 110.2 6.4 → 1.70×
33
+ ```
34
+
35
+ The trick is **not** faster lexing in isolation. A Ruby lexer's cost is
36
+ dominated by allocating Ruby objects at the boundary — one String + one Array
37
+ per token — which both engines pay identically, so `lex` alone is only ~1.0×
38
+ even though carmine's core is ~4.6× faster Rust-to-Rust. But a token stream is
39
+ just an *intermediate*: the real output is an HTML string. So blusher **fuses
40
+ lexing and HTML formatting in Rust and returns one String**, crossing the Ruby
41
+ boundary once (O(1)) instead of once per token (O(n)). That is where carmine's
42
+ speed finally shows up end-to-end. (Two supporting wins: a thread-local cache of
43
+ the parsed/compiled table so the ~0.5 ms build doesn't repeat per call, and
44
+ resolving CSS class names in Rust.)
45
+
46
+ Scope and honesty:
47
+
48
+ - The fast path applies when the whole pipeline is `format(lex(src))` with an
49
+ **unadorned `Rouge::Formatters::HTML`** (the `Rouge.highlight` / Jekyll /
50
+ kramdown default). Subclasses (HTMLInline, HTMLTable, Pygments, …), the
51
+ token-streaming block form, and direct token consumers transparently fall
52
+ back to rouge — same output, no speedup.
53
+ - It applies to the **126 of 227 callback-free lexers** (JSON, SQL, YAML, CSS,
54
+ many config/markup formats). The other 101 use rouge `proc` rules carmine
55
+ can't execute; blusher detects this up front and runs rouge for them (parity,
56
+ no wasted work). So a JSON/SQL/config-heavy workload wins big; a
57
+ Ruby/Python-heavy one trends toward parity.
58
+ - carmine's raw 4.6× is fully realized only Rust-to-Rust (e.g. embedded in
59
+ [rubyrs](https://github.com/linyiru/rubyrs), no Ruby boundary at all).
60
+
61
+ ## How it works
62
+
63
+ - `require "blusher"` aliases the original `Rouge::RegexLexer#lex` to
64
+ `__blusher_rouge_lex` and replaces it. For a routable lexer (table exists and
65
+ is callback-free), `lex` without a block returns a deferred
66
+ `Blusher::Shim::TokenStream` holding `(lexer, source)` — nothing is lexed yet.
67
+ - The patched `Rouge::Formatters::HTML#format` recognises that stream and calls
68
+ the **fused** `Blusher::Engine.format_html`, which lexes *and* formats in Rust
69
+ and returns one HTML String. Any other consumer (a different formatter, the
70
+ block form, `.to_a`) just iterates the stream, which lexes via carmine on
71
+ demand and yields the same `[Token, value]` pairs rouge would.
72
+ - carmine **declines** anything it can't reproduce identically (callback rules,
73
+ recursion, …) and blusher falls back to rouge, so the output is always exactly
74
+ rouge's — verified against the full lexer spec suite.
75
+ - The native backend is the **rb-sys/magnus extension** (`blusher.{bundle,so}`).
76
+ A `carmine-ffi` + Fiddle path is kept as a dependency-light fallback (it
77
+ marshals tokens through JSON, can't fuse, and is for correctness only).
78
+
79
+ ## Correctness
80
+
81
+ Verified against rouge v5.0.0's **full lexer spec suite: 757 runs, 5130
82
+ assertions, 0 failures** (`rake spec`). The spec suite is the correctness gate —
83
+ any new divergence must be fixed in carmine or the rule forced to decline.
84
+
85
+ ## Build (dev, in the rubyrs monorepo)
86
+
87
+ ```sh
88
+ rake compile # build the magnus ext → lib/blusher.<dlext>
89
+ rake compile_ffi # (optional) build the carmine-ffi cdylib fallback → ext/
90
+ rake tables # regenerate lib/blusher/tables/<tag>.json from installed rouge
91
+ ROUGE_SRC=/path/to/rouge rake spec
92
+ ```
93
+
94
+ ## Status
95
+
96
+ Part of [momiji-rs](https://github.com/momiji-rs) — Rust-backed engines for the
97
+ Ruby ecosystem. Tables are derived from rouge (MIT, © Jeanine Adkisson and
98
+ contributors).
@@ -0,0 +1,18 @@
1
+ # The gem-internal native extension, compiled at `gem install` time by
2
+ # rb-sys/rake-compiler (and cross-compiled in CI for precompiled platform
3
+ # gems — see .github/workflows/release.yml). Mirrors crates/blusher-ext in the
4
+ # rubyrs monorepo, but depends on the PUBLISHED carmine from crates.io so the
5
+ # standalone momiji-rs/blusher repo builds without the workspace.
6
+ [package]
7
+ name = "blusher"
8
+ version = "0.1.0"
9
+ edition = "2021"
10
+ license = "MIT"
11
+ publish = false
12
+
13
+ [lib]
14
+ crate-type = ["cdylib"]
15
+
16
+ [dependencies]
17
+ carmine = "0.3"
18
+ magnus = "0.7"
@@ -0,0 +1,10 @@
1
+ # frozen_string_literal: true
2
+
3
+ # rb-sys driven build: rake-compiler invokes this at `gem install`, and
4
+ # `create_rust_makefile` emits a Makefile that runs `cargo build` and installs
5
+ # the cdylib as the gem's loadable object. The argument names the output so it
6
+ # lands at `lib/blusher/blusher.<dlext>` and is required as "blusher/blusher".
7
+ require "mkmf"
8
+ require "rb_sys/mkmf"
9
+
10
+ create_rust_makefile("blusher/blusher")