inkmark 0.1.0 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +9 -0
- data/Cargo.lock +39 -39
- data/Cargo.toml +7 -1
- data/README.md +26 -16
- data/ext/inkmark/Cargo.toml +1 -1
- data/lib/inkmark/version.rb +1 -1
- data/lib/inkmark.rb +7 -1
- metadata +4 -4
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: aec483333178a847396130007073b25d4c6ca453bbbc5b3338d2314a4b1dc161
|
|
4
|
+
data.tar.gz: aa8fe81268c5cd018708744cec3098733410f2f057ab1e21593f3ca2d5d7a5d8
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 21233618a533d15f8529ed9e7739e34d81344d3c5439c25648e0ecf69173b4c79a23cd5a6f1548e5a7a488d46c968311e2d79058c12b69b98b3f45130a241e9e
|
|
7
|
+
data.tar.gz: 5cc63dd2c016d6446e35123d06f6e696816217ea0c6a9204d4eefecc846de46439ce16fdb3832e021441922bf467dc009e883df48ade1a3be61233420ab471d2
|
data/CHANGELOG.md
CHANGED
|
@@ -1,3 +1,12 @@
|
|
|
1
|
+
## [0.1.2] - 2026-06-21
|
|
2
|
+
|
|
3
|
+
- Fix `Inkmark.truncate_markdown` raising `TypeError` when called without explicit `options:`.
|
|
4
|
+
- Update dependencies on the Rust side.
|
|
5
|
+
|
|
6
|
+
## [0.1.1] - 2026-04-22
|
|
7
|
+
|
|
8
|
+
- Strip DWARF debug info from shipped Linux and Windows binaries via `strip = "debuginfo"`.
|
|
9
|
+
|
|
1
10
|
## [0.1.0] - 2026-04-22
|
|
2
11
|
|
|
3
12
|
- Initial public release
|
data/Cargo.lock
CHANGED
|
@@ -67,9 +67,9 @@ checksum = "5e764a1d40d510daf35e07be9eb06e75770908c27d411ee6c92109c9840eaaf7"
|
|
|
67
67
|
|
|
68
68
|
[[package]]
|
|
69
69
|
name = "bitflags"
|
|
70
|
-
version = "2.
|
|
70
|
+
version = "2.13.0"
|
|
71
71
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
72
|
-
checksum = "
|
|
72
|
+
checksum = "b4388bee8683e3d04af747c73422af53102d2bd24d9eadb6cbc100baef4b43f8"
|
|
73
73
|
|
|
74
74
|
[[package]]
|
|
75
75
|
name = "bstr"
|
|
@@ -124,9 +124,9 @@ checksum = "abd57806937c9cc163efc8ea3910e00a62e2aeb0b8119f1793a978088f8f6b04"
|
|
|
124
124
|
|
|
125
125
|
[[package]]
|
|
126
126
|
name = "displaydoc"
|
|
127
|
-
version = "0.2.
|
|
127
|
+
version = "0.2.6"
|
|
128
128
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
129
|
-
checksum = "
|
|
129
|
+
checksum = "1ac70aa55017e108007fbaf5aa0f54b021c98f92ff8af59d42eda9da96e3dd4f"
|
|
130
130
|
dependencies = [
|
|
131
131
|
"proc-macro2",
|
|
132
132
|
"quote",
|
|
@@ -135,15 +135,15 @@ dependencies = [
|
|
|
135
135
|
|
|
136
136
|
[[package]]
|
|
137
137
|
name = "either"
|
|
138
|
-
version = "1.
|
|
138
|
+
version = "1.16.0"
|
|
139
139
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
140
|
-
checksum = "
|
|
140
|
+
checksum = "91622ff5e7162018101f2fea40d6ebf4a78bbe5a49736a2020649edf9693679e"
|
|
141
141
|
|
|
142
142
|
[[package]]
|
|
143
143
|
name = "emojis"
|
|
144
|
-
version = "0.8.
|
|
144
|
+
version = "0.8.2"
|
|
145
145
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
146
|
-
checksum = "
|
|
146
|
+
checksum = "0a4d5d50b0b58df5173d8ff1192b4d1422ceae5d981b30d4b6f8ed1d673a2bc4"
|
|
147
147
|
dependencies = [
|
|
148
148
|
"phf",
|
|
149
149
|
]
|
|
@@ -321,9 +321,9 @@ dependencies = [
|
|
|
321
321
|
|
|
322
322
|
[[package]]
|
|
323
323
|
name = "idna_adapter"
|
|
324
|
-
version = "1.2.
|
|
324
|
+
version = "1.2.2"
|
|
325
325
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
326
|
-
checksum = "
|
|
326
|
+
checksum = "cb68373c0d6620ef8105e855e7745e18b0d00d3bdb07fb532e434244cdb9a714"
|
|
327
327
|
dependencies = [
|
|
328
328
|
"icu_normalizer",
|
|
329
329
|
"icu_properties",
|
|
@@ -331,7 +331,7 @@ dependencies = [
|
|
|
331
331
|
|
|
332
332
|
[[package]]
|
|
333
333
|
name = "inkmark"
|
|
334
|
-
version = "0.1.
|
|
334
|
+
version = "0.1.2"
|
|
335
335
|
dependencies = [
|
|
336
336
|
"deunicode",
|
|
337
337
|
"emojis",
|
|
@@ -367,9 +367,9 @@ checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe"
|
|
|
367
367
|
|
|
368
368
|
[[package]]
|
|
369
369
|
name = "libc"
|
|
370
|
-
version = "0.2.
|
|
370
|
+
version = "0.2.186"
|
|
371
371
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
372
|
-
checksum = "
|
|
372
|
+
checksum = "68ab91017fe16c622486840e4c83c9a37afeff978bd239b5293d61ece587de66"
|
|
373
373
|
|
|
374
374
|
[[package]]
|
|
375
375
|
name = "libloading"
|
|
@@ -398,9 +398,9 @@ checksum = "92daf443525c4cce67b150400bc2316076100ce0b3686209eb8cf3c31612e6f0"
|
|
|
398
398
|
|
|
399
399
|
[[package]]
|
|
400
400
|
name = "log"
|
|
401
|
-
version = "0.4.
|
|
401
|
+
version = "0.4.33"
|
|
402
402
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
403
|
-
checksum = "
|
|
403
|
+
checksum = "0ceec5bc11778974d1bcb055b18002eba7f4b3518b6a0081b3af5f21666da9ad"
|
|
404
404
|
|
|
405
405
|
[[package]]
|
|
406
406
|
name = "magnus"
|
|
@@ -427,9 +427,9 @@ dependencies = [
|
|
|
427
427
|
|
|
428
428
|
[[package]]
|
|
429
429
|
name = "memchr"
|
|
430
|
-
version = "2.8.
|
|
430
|
+
version = "2.8.2"
|
|
431
431
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
432
|
-
checksum = "
|
|
432
|
+
checksum = "88904434abc2901f197fe8cc55f0445e7ded921dba5911dad2e2b39b48e663c4"
|
|
433
433
|
|
|
434
434
|
[[package]]
|
|
435
435
|
name = "minimal-lexical"
|
|
@@ -507,9 +507,9 @@ dependencies = [
|
|
|
507
507
|
|
|
508
508
|
[[package]]
|
|
509
509
|
name = "pulldown-cmark"
|
|
510
|
-
version = "0.13.
|
|
510
|
+
version = "0.13.4"
|
|
511
511
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
512
|
-
checksum = "
|
|
512
|
+
checksum = "e9f068eba8e7071c5f9511831b44f32c740d5adf574e990f946ddb53db2f314e"
|
|
513
513
|
dependencies = [
|
|
514
514
|
"bitflags",
|
|
515
515
|
"memchr",
|
|
@@ -543,18 +543,18 @@ dependencies = [
|
|
|
543
543
|
|
|
544
544
|
[[package]]
|
|
545
545
|
name = "rb-sys"
|
|
546
|
-
version = "0.9.
|
|
546
|
+
version = "0.9.128"
|
|
547
547
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
548
|
-
checksum = "
|
|
548
|
+
checksum = "45ca28513560e56cfb79a62b1fce363c73af170a182024ce880c77ee9429920a"
|
|
549
549
|
dependencies = [
|
|
550
550
|
"rb-sys-build",
|
|
551
551
|
]
|
|
552
552
|
|
|
553
553
|
[[package]]
|
|
554
554
|
name = "rb-sys-build"
|
|
555
|
-
version = "0.9.
|
|
555
|
+
version = "0.9.128"
|
|
556
556
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
557
|
-
checksum = "
|
|
557
|
+
checksum = "ce04b2c55eff3a21aaa623fcc655d94373238e72cac6b3e1a3641ff31649f99a"
|
|
558
558
|
dependencies = [
|
|
559
559
|
"bindgen",
|
|
560
560
|
"lazy_static",
|
|
@@ -594,9 +594,9 @@ dependencies = [
|
|
|
594
594
|
|
|
595
595
|
[[package]]
|
|
596
596
|
name = "regex"
|
|
597
|
-
version = "1.12.
|
|
597
|
+
version = "1.12.4"
|
|
598
598
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
599
|
-
checksum = "
|
|
599
|
+
checksum = "f1292b7759ae1cb9ec195452d1390a074f0cd8541ab7a5a8c31cd6db45d4a6ba"
|
|
600
600
|
dependencies = [
|
|
601
601
|
"aho-corasick",
|
|
602
602
|
"memchr",
|
|
@@ -617,9 +617,9 @@ dependencies = [
|
|
|
617
617
|
|
|
618
618
|
[[package]]
|
|
619
619
|
name = "regex-syntax"
|
|
620
|
-
version = "0.8.
|
|
620
|
+
version = "0.8.11"
|
|
621
621
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
622
|
-
checksum = "
|
|
622
|
+
checksum = "d6f6ff9a378485b298a5286656da665ba74413d36db0979633275d2e708145d4"
|
|
623
623
|
|
|
624
624
|
[[package]]
|
|
625
625
|
name = "rustc-hash"
|
|
@@ -691,15 +691,15 @@ checksum = "703d5c7ef118737c72f1af64ad2f6f8c5e1921f818cdcb97b8fe6fc69bf66214"
|
|
|
691
691
|
|
|
692
692
|
[[package]]
|
|
693
693
|
name = "siphasher"
|
|
694
|
-
version = "1.0.
|
|
694
|
+
version = "1.0.3"
|
|
695
695
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
696
|
-
checksum = "
|
|
696
|
+
checksum = "8ee5873ec9cce0195efcb7a4e9507a04cd49aec9c83d0389df45b1ef7ba2e649"
|
|
697
697
|
|
|
698
698
|
[[package]]
|
|
699
699
|
name = "smallvec"
|
|
700
|
-
version = "1.15.
|
|
700
|
+
version = "1.15.2"
|
|
701
701
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
702
|
-
checksum = "
|
|
702
|
+
checksum = "8ed6a63f02c8539c91a8685a86f4099661ba3da017932f6ebbea6de3f0fa7c90"
|
|
703
703
|
|
|
704
704
|
[[package]]
|
|
705
705
|
name = "stable_deref_trait"
|
|
@@ -709,9 +709,9 @@ checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596"
|
|
|
709
709
|
|
|
710
710
|
[[package]]
|
|
711
711
|
name = "syn"
|
|
712
|
-
version = "2.0.
|
|
712
|
+
version = "2.0.118"
|
|
713
713
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
714
|
-
checksum = "
|
|
714
|
+
checksum = "1b9ae57f904213ebb649ce6895b8a66c66f0203b9319718f69a5612a065b1422"
|
|
715
715
|
dependencies = [
|
|
716
716
|
"proc-macro2",
|
|
717
717
|
"quote",
|
|
@@ -791,9 +791,9 @@ checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75"
|
|
|
791
791
|
|
|
792
792
|
[[package]]
|
|
793
793
|
name = "unicode-segmentation"
|
|
794
|
-
version = "1.13.
|
|
794
|
+
version = "1.13.3"
|
|
795
795
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
796
|
-
checksum = "
|
|
796
|
+
checksum = "c6f5d3c3b1bf09027a88a6bc961fc00497d651009560b5463668dc81b0fa87a8"
|
|
797
797
|
|
|
798
798
|
[[package]]
|
|
799
799
|
name = "url"
|
|
@@ -864,9 +864,9 @@ checksum = "1ffae5123b2d3fc086436f8834ae3ab053a283cfac8fe0a0b8eaae044768a4c4"
|
|
|
864
864
|
|
|
865
865
|
[[package]]
|
|
866
866
|
name = "yoke"
|
|
867
|
-
version = "0.8.
|
|
867
|
+
version = "0.8.3"
|
|
868
868
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
869
|
-
checksum = "
|
|
869
|
+
checksum = "709fe23a0424b6a435d82152b1bd3fdfb0833487d5fa90d05d42762a9891fef5"
|
|
870
870
|
dependencies = [
|
|
871
871
|
"stable_deref_trait",
|
|
872
872
|
"yoke-derive",
|
|
@@ -887,9 +887,9 @@ dependencies = [
|
|
|
887
887
|
|
|
888
888
|
[[package]]
|
|
889
889
|
name = "zerofrom"
|
|
890
|
-
version = "0.1.
|
|
890
|
+
version = "0.1.8"
|
|
891
891
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
892
|
-
checksum = "
|
|
892
|
+
checksum = "0ec05a11813ea801ff6d75110ad09cd0824ddba17dfe17128ea0d5f68e6c5272"
|
|
893
893
|
dependencies = [
|
|
894
894
|
"zerofrom-derive",
|
|
895
895
|
]
|
data/Cargo.toml
CHANGED
|
@@ -17,7 +17,13 @@ opt-level = 3
|
|
|
17
17
|
lto = "fat"
|
|
18
18
|
codegen-units = 1
|
|
19
19
|
debug = true
|
|
20
|
-
|
|
20
|
+
# Keep `debug = true` for local release builds (full DWARF in target/), but
|
|
21
|
+
# strip DWARF from the shipped binaries. Without this, every cross-compiled
|
|
22
|
+
# .so/.dll carries ~30MB of DWARF per Ruby ABI — Mach-O splits DWARF into a
|
|
23
|
+
# separate .dSYM bundle so darwin gems never had this cost. "debuginfo" drops
|
|
24
|
+
# DWARF while preserving the symbol table, so panic backtraces still include
|
|
25
|
+
# function names (no file:line) and `perf`/flamegraphs still resolve symbols.
|
|
26
|
+
strip = "debuginfo"
|
|
21
27
|
incremental = false
|
|
22
28
|
|
|
23
29
|
[profile.test]
|
data/README.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# Inkmark
|
|
2
2
|
|
|
3
|
-
A very fast, feature-packed, AI-first
|
|
3
|
+
A very fast, feature-packed, AI-first Markdown gem for Ruby.
|
|
4
4
|
|
|
5
5
|
[](https://github.com/yaroslav/inkmark/releases)
|
|
6
6
|
[](https://rubydoc.info/gems/inkmark)
|
|
@@ -12,12 +12,17 @@ A very fast, feature-packed, AI-first markdown gem for Ruby.
|
|
|
12
12
|
- **Very fast**. Up to 1.3× faster than redcarpet _(not CommonMark-conformant)_, about 3×–9× faster than other Ruby Markdown gems with native extensions. Built with Rust, based on [pulldown-cmark](https://github.com/pulldown-cmark/pulldown-cmark), uses SIMD.
|
|
13
13
|
- **No surprises**. CommonMark + GitHub Flavored Markdown conformance.
|
|
14
14
|
- **"Batteries included" approach**. Build lots of useful features, make them easy to use and as fast as possible.
|
|
15
|
-
- **Easy to use**. As simple as a one-method API. Pass options inline as a hash, set them one by one, or set default options for the entire application.
|
|
16
|
-
- **Feature-packed**. Server-side syntax highlighting with themes, frontmatter support, table of contents in Markdown and HTML, plain text export, extraction of
|
|
15
|
+
- **Easy to use**. As simple as a one-method API. Pass options inline as a hash, set them one by one, or set default options for the entire application.
|
|
16
|
+
- **Feature-packed**. Server-side syntax highlighting with themes, frontmatter support, table of contents in Markdown and HTML, plain text export, extraction of headings/links/images, statistics (character and word count, likely document language, blocks count), lazy image loading attributes, emoji shortcodes, autolinks, heading IDs with Unicode-transliterated slugs, wikilinks, footnotes, tables, task lists, smart punctuation, hard wraps, "nofollow/noopener" on external links.
|
|
17
17
|
- **AI-first**. Two chunking primitives: heading-based with breadcrumbs and per-chunk character/word counts, and sliding-window with overlap for size-bounded chunks where headings are absent or uneven. Block-aware or word-aware truncation for context-window budgeting. Markdown-to-Markdown pipeline. Plain-text extraction for embedding models. Structured extraction of headings, images, links, code blocks—each carrying byte ranges back into the source.
|
|
18
18
|
- **Security conscious**. Raw HTML denied by default. Hostname and URL-scheme allowlists for both links and images. GFM tagfilter for dangerous tags. A Rust-backed gem.
|
|
19
19
|
- **Easy extension API**. Hook any element with a Ruby block—no subclassing, no intermediate AST, no HTML post-processing. Rewrite URLs, swap code blocks for your own renderer, drop subtrees, or just walk the document for analysis. Handlers fire inside the single-pass parser, so extension costs essentially nothing beyond the render itself—and far less than regexing over output HTML.
|
|
20
20
|
|
|
21
|
+
**See the introductory post for background and motivation**:
|
|
22
|
+
|
|
23
|
+
**[Inkmark: a very fast, feature-packed, AI-first Markdown gem for Ruby
|
|
24
|
+
](https://yaroslav.io/posts/inkmark-fast-ai-first-markdown)**
|
|
25
|
+
|
|
21
26
|
## Contents
|
|
22
27
|
|
|
23
28
|
- [Installation](#installation)
|
|
@@ -50,6 +55,8 @@ gem "inkmark"
|
|
|
50
55
|
|
|
51
56
|
Ruby 3.3+ is supported.
|
|
52
57
|
|
|
58
|
+
The gem comes precompiled, a compiler toolchain is _not_ required for installation.
|
|
59
|
+
|
|
53
60
|
## Quick start
|
|
54
61
|
|
|
55
62
|
```ruby
|
|
@@ -91,7 +98,7 @@ for nested element-policy hashes). You can—and are recommended to!—override
|
|
|
91
98
|
- **`:trusted`**: `:recommended` plus raw HTML pass-through.
|
|
92
99
|
**Dangerous.** Intended only for content you fully trust: internal,
|
|
93
100
|
team-authored. With raw HTML on, Inkmark does no sanitization beyond
|
|
94
|
-
the narrow GFM tagfilter (turn it off
|
|
101
|
+
the narrow GFM tagfilter (turn it off at your own risk); the caller is
|
|
95
102
|
responsible for output safety. Do not apply this preset to anything a user can influence, directly or indirectly.
|
|
96
103
|
|
|
97
104
|
- **`:gfm`**: the bare default. CommonMark plus the core GFM extensions
|
|
@@ -129,7 +136,7 @@ Inkmark.to_html(internal_doc, options: { preset: :trusted })
|
|
|
129
136
|
## Options
|
|
130
137
|
|
|
131
138
|
GFM extensions are on by default; raw HTML rendering is off by default.
|
|
132
|
-
Pass a hash to `Inkmark.to_html` / `Inkmark.new`, or mutate
|
|
139
|
+
Pass a hash to `Inkmark.to_html` / `Inkmark.new`, or mutate an `Inkmark::Options`
|
|
133
140
|
instance via its accessors.
|
|
134
141
|
|
|
135
142
|
| Key | Default | Description |
|
|
@@ -158,7 +165,7 @@ instance via its accessors.
|
|
|
158
165
|
| `wikilinks` | `false` | `[[Page]]` and `[[Page\|label]]` render as links. |
|
|
159
166
|
| `frontmatter` | `false` | Frontmatter (YAML metadata at the start of the document). Parsed and exposed via `Inkmark#frontmatter`; the block is stripped from rendered output. |
|
|
160
167
|
|
|
161
|
-
Options can be supplied
|
|
168
|
+
Options can be supplied in several ways:
|
|
162
169
|
|
|
163
170
|
```ruby
|
|
164
171
|
# As a hash at construction
|
|
@@ -185,7 +192,7 @@ Inkmark.new("x", options: { taples: true })
|
|
|
185
192
|
|
|
186
193
|
## Raw HTML
|
|
187
194
|
|
|
188
|
-
Raw HTML is suppressed by default. This is safe-by-default for rendering untrusted
|
|
195
|
+
Raw HTML is suppressed by default. This is safe-by-default for rendering untrusted Markdown:
|
|
189
196
|
|
|
190
197
|
```ruby
|
|
191
198
|
Inkmark.to_html("<script>alert(1)</script>")
|
|
@@ -248,7 +255,7 @@ relative URLs.
|
|
|
248
255
|
|
|
249
256
|
## URL scheme filtering
|
|
250
257
|
|
|
251
|
-
For rendering untrusted
|
|
258
|
+
For rendering untrusted Markdown, opt in to scheme allowlists to block
|
|
252
259
|
`javascript:`, `data:`, and other dangerous URL schemes in links and
|
|
253
260
|
images:
|
|
254
261
|
|
|
@@ -274,7 +281,7 @@ Inkmark.to_html(">)",
|
|
|
274
281
|
# => "<p>pic</p>\n" # dropped to alt text
|
|
275
282
|
```
|
|
276
283
|
|
|
277
|
-
**Scope:** scheme filtering applies to
|
|
284
|
+
**Scope:** scheme filtering applies to Markdown-emitted links and images
|
|
278
285
|
(`[text](url)` / ``). Raw HTML `<a href>` / `<img src>` inside
|
|
279
286
|
`raw_html: true` content is *not* filtered—for that case use a
|
|
280
287
|
downstream HTML sanitizer like Loofah.
|
|
@@ -394,7 +401,7 @@ parent. Root-level sections and the preamble have an empty array. Skipped
|
|
|
394
401
|
levels are omitted, so an `###` directly under an `#` has `breadcrumb:
|
|
395
402
|
["Top"]`, not `["Top", nil]`. RAG pipelines typically prepend the
|
|
396
403
|
breadcrumb to each chunk before embedding—it gives the vector model a
|
|
397
|
-
cheap signal about the chunk's place in the document
|
|
404
|
+
cheap signal about the chunk's place in the document.
|
|
398
405
|
|
|
399
406
|
Enable `statistics: true` to add `:character_count` and `:word_count` to
|
|
400
407
|
every section entry. Counts reflect the section's filter-applied text
|
|
@@ -460,7 +467,7 @@ embedding quality for downstream semantic search.
|
|
|
460
467
|
sections = Inkmark.chunks_by_heading(doc, options: {
|
|
461
468
|
emoji_shortcodes: true, # keep—improves semantic signal
|
|
462
469
|
links: {
|
|
463
|
-
autolink: true, # keep—proper anchor
|
|
470
|
+
autolink: true, # keep—proper anchor Markdown
|
|
464
471
|
allowed_schemes: %w[http https mailto], # keep—safe URLs
|
|
465
472
|
nofollow: false # off—would embed <a rel=...> HTML
|
|
466
473
|
},
|
|
@@ -907,7 +914,10 @@ Use `parent_kind` and `ancestor_kinds` for context-sensitive decisions:
|
|
|
907
914
|
md.on(:image) { |img| img.delete if img.ancestor_kinds.include?(:link) }
|
|
908
915
|
|
|
909
916
|
# Only process top-level paragraphs
|
|
910
|
-
md.on(:paragraph)
|
|
917
|
+
md.on(:paragraph) do |p|
|
|
918
|
+
next unless p.parent_kind.nil?
|
|
919
|
+
# ... only top-level paragraphs reach here
|
|
920
|
+
end
|
|
911
921
|
```
|
|
912
922
|
|
|
913
923
|
`depth` gives the nesting level (0 = top-level block):
|
|
@@ -927,7 +937,7 @@ original source to recover the raw Markdown for any element:
|
|
|
927
937
|
source = File.read("post.md")
|
|
928
938
|
md = Inkmark.new(source)
|
|
929
939
|
md.on(:heading) do |h|
|
|
930
|
-
puts "#{h.byte_range}: #{source
|
|
940
|
+
puts "#{h.byte_range}: #{source.byteslice(h.byte_range).inspect}"
|
|
931
941
|
end
|
|
932
942
|
md.walk
|
|
933
943
|
```
|
|
@@ -939,7 +949,7 @@ and `:hard_break`. Also `nil` for `:link` when `links: { autolink: true }` is en
|
|
|
939
949
|
|
|
940
950
|
### Event object reference
|
|
941
951
|
|
|
942
|
-
Every handler receives
|
|
952
|
+
Every handler receives an `Inkmark::Event` with these fields and methods:
|
|
943
953
|
|
|
944
954
|
| Field / method | Type | Description |
|
|
945
955
|
|---|---|---|
|
|
@@ -972,7 +982,7 @@ Every handler receives a `Inkmark::Event` with these fields and methods:
|
|
|
972
982
|
| `:list` |—| `html=`, `markdown=` |
|
|
973
983
|
| `:ordered_list` |—| `html=`, `markdown=` |
|
|
974
984
|
| `:list_item` | `text` | `html=`, `markdown=` |
|
|
975
|
-
| `:code_block` | `text
|
|
985
|
+
| `:code_block` | `text` (alias `source`), `lang` | `html=`, `markdown=` |
|
|
976
986
|
| `:table` |—| `html=`, `markdown=` |
|
|
977
987
|
| `:table_head` |—| `html=`, `markdown=` |
|
|
978
988
|
| `:table_row` | `text` | `html=`, `markdown=` |
|
|
@@ -1023,7 +1033,7 @@ Post-render filters (`syntax_highlight`, allowlists, `images: { lazy: true }`,
|
|
|
1023
1033
|
|
|
1024
1034
|
Inkmark ships a benchmark harness comparing it against `kramdown`,
|
|
1025
1035
|
`commonmarker`, `redcarpet`, `markly`, and `rdiscount` on a sweep of real
|
|
1026
|
-
|
|
1036
|
+
Markdown inputs.
|
|
1027
1037
|
|
|
1028
1038
|
Measuring apples to apples: every adapter is tuned for **feature parity** with
|
|
1029
1039
|
Inkmark's defaults—CommonMark + core GFM (tables, strikethrough, tasklists,
|
data/ext/inkmark/Cargo.toml
CHANGED
data/lib/inkmark/version.rb
CHANGED
data/lib/inkmark.rb
CHANGED
|
@@ -222,7 +222,13 @@ class Inkmark
|
|
|
222
222
|
params = normalize_truncate_params(
|
|
223
223
|
chars: chars, words: words, at: at, marker: marker
|
|
224
224
|
)
|
|
225
|
-
|
|
225
|
+
# truncate's native binding requires an options Hash; unlike the
|
|
226
|
+
# to_html/to_plain_text bindings it has no nil fast path, so fall
|
|
227
|
+
# back to the default options hash when the resolver returns nil.
|
|
228
|
+
_native_truncate_markdown(
|
|
229
|
+
source, params,
|
|
230
|
+
resolve_frozen_options(options) || default_options.to_native_hash_frozen
|
|
231
|
+
)
|
|
226
232
|
end
|
|
227
233
|
|
|
228
234
|
# Render +source+ through the filter pipeline and serialize to plain
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: inkmark
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.1.
|
|
4
|
+
version: 0.1.2
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Yaroslav Markin
|
|
@@ -135,7 +135,7 @@ dependencies:
|
|
|
135
135
|
- - ">="
|
|
136
136
|
- !ruby/object:Gem::Version
|
|
137
137
|
version: '0'
|
|
138
|
-
description: A very fast, feature-packed, AI-first
|
|
138
|
+
description: A very fast, feature-packed, AI-first Markdown (CommonMark/GFM) gem for
|
|
139
139
|
Ruby, based on pulldown-cmark (Rust).
|
|
140
140
|
email:
|
|
141
141
|
- yaroslav@markin.net
|
|
@@ -202,7 +202,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
202
202
|
- !ruby/object:Gem::Version
|
|
203
203
|
version: '0'
|
|
204
204
|
requirements: []
|
|
205
|
-
rubygems_version: 4.0.
|
|
205
|
+
rubygems_version: 4.0.10
|
|
206
206
|
specification_version: 4
|
|
207
|
-
summary: Very fast, feature-packed, AI-first
|
|
207
|
+
summary: Very fast, feature-packed, AI-first Markdown gem for Ruby.
|
|
208
208
|
test_files: []
|