makiri 0.1.0-aarch64-linux
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.github/workflows/ci.yml +73 -0
- data/.github/workflows/conformance.yml +94 -0
- data/.github/workflows/release.yml +223 -0
- data/.github/workflows/security.yml +95 -0
- data/.gitmodules +3 -0
- data/CHANGELOG.md +102 -0
- data/LICENSE +176 -0
- data/NOTICE +12 -0
- data/README.md +134 -0
- data/Rakefile +150 -0
- data/lib/makiri/3.2/makiri.so +0 -0
- data/lib/makiri/3.3/makiri.so +0 -0
- data/lib/makiri/3.4/makiri.so +0 -0
- data/lib/makiri/4.0/makiri.so +0 -0
- data/lib/makiri/attribute.rb +13 -0
- data/lib/makiri/cdata.rb +6 -0
- data/lib/makiri/comment.rb +6 -0
- data/lib/makiri/css.rb +11 -0
- data/lib/makiri/document.rb +82 -0
- data/lib/makiri/document_fragment.rb +21 -0
- data/lib/makiri/document_type.rb +14 -0
- data/lib/makiri/element.rb +17 -0
- data/lib/makiri/node.rb +221 -0
- data/lib/makiri/node_set.rb +105 -0
- data/lib/makiri/processing_instruction.rb +8 -0
- data/lib/makiri/text.rb +16 -0
- data/lib/makiri/version.rb +5 -0
- data/lib/makiri/xpath.rb +14 -0
- data/lib/makiri/xpath_context.rb +41 -0
- data/lib/makiri.rb +47 -0
- data/script/build_native_gem.rb +50 -0
- data/script/check_c_safety.rb +238 -0
- data/script/check_c_safety_allowlist.yml +12 -0
- data/sig/makiri.rbs +4 -0
- metadata +127 -0
|
@@ -0,0 +1,238 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
# frozen_string_literal: true
|
|
3
|
+
|
|
4
|
+
require "optparse"
|
|
5
|
+
require "pathname"
|
|
6
|
+
require "yaml"
|
|
7
|
+
|
|
8
|
+
ROOT = Pathname.new(__dir__).join("..").expand_path
|
|
9
|
+
ALLOWLIST_PATH = ROOT.join("script/check_c_safety_allowlist.yml")
|
|
10
|
+
|
|
11
|
+
Rule = Struct.new(:id, :message, :regex, keyword_init: true)
|
|
12
|
+
Finding = Struct.new(:path, :line, :rule, :text, keyword_init: true)
|
|
13
|
+
|
|
14
|
+
RULES = [
|
|
15
|
+
Rule.new(
|
|
16
|
+
id: "string_value_cstr",
|
|
17
|
+
message: "StringValueCStr bypasses explicit ptr/len handling",
|
|
18
|
+
regex: /\bStringValueCStr\s*\(/
|
|
19
|
+
),
|
|
20
|
+
Rule.new(
|
|
21
|
+
id: "direct_alloc",
|
|
22
|
+
message: "direct (x)malloc/calloc/realloc/strdup / ALLOC_N must go through safe helpers",
|
|
23
|
+
regex: /\b(?:x?(?:malloc|calloc|realloc|strdup)|ALLOC_N|REALLOC_N)\s*\(/
|
|
24
|
+
),
|
|
25
|
+
Rule.new(
|
|
26
|
+
id: "direct_strlen",
|
|
27
|
+
message: "strlen requires an explicitly no-NUL checked C string",
|
|
28
|
+
regex: /\bstrlen\s*\(/
|
|
29
|
+
),
|
|
30
|
+
Rule.new(
|
|
31
|
+
id: "alloca",
|
|
32
|
+
message: "ALLOCA_N uses stack space based on runtime input",
|
|
33
|
+
regex: /\bALLOCA_N\s*\(/
|
|
34
|
+
),
|
|
35
|
+
Rule.new(
|
|
36
|
+
id: "ruby_string_ptr",
|
|
37
|
+
message: "RSTRING_PTR/RSTRING_LEN must be isolated in checked Ruby input helpers",
|
|
38
|
+
regex: /\bRSTRING_(?:PTR|LEN)\s*\(/
|
|
39
|
+
),
|
|
40
|
+
Rule.new(
|
|
41
|
+
id: "allocation_plus_one",
|
|
42
|
+
message: "allocation sizes using + 1 need overflow-checked helpers",
|
|
43
|
+
regex: /\b(?:malloc|calloc|realloc|xrealloc)\s*\([^;\n]*\+\s*1\b/
|
|
44
|
+
),
|
|
45
|
+
Rule.new(
|
|
46
|
+
id: "sizeof_allocation",
|
|
47
|
+
message: "count * sizeof(...) must be overflow-checked before allocation/copy sizing",
|
|
48
|
+
regex: /\*\s*sizeof\s*\(/
|
|
49
|
+
),
|
|
50
|
+
Rule.new(
|
|
51
|
+
id: "cap_times_two",
|
|
52
|
+
message: "capacity doubling must use an overflow-checked grow helper",
|
|
53
|
+
regex: /(?:\b\w*cap\w*\s*\*\s*2\b|\b2\s*\*\s*\w*cap\w*\b)/
|
|
54
|
+
),
|
|
55
|
+
Rule.new(
|
|
56
|
+
id: "while_cap_double",
|
|
57
|
+
message: "looped cap *= 2 growth must use an overflow-checked grow helper",
|
|
58
|
+
regex: /while\s*\([^)]*>\s*[^)]*cap[^)]*\).*?\*=\s*2/
|
|
59
|
+
),
|
|
60
|
+
Rule.new(
|
|
61
|
+
id: "verified_text_forge",
|
|
62
|
+
message: "mkr_verified_text_t must be minted only by mkr_verified_text_from_view (the validated boundary)",
|
|
63
|
+
regex: /\(\s*mkr_verified_text_t\s*\)\s*\{/
|
|
64
|
+
),
|
|
65
|
+
].freeze
|
|
66
|
+
|
|
67
|
+
def load_config
|
|
68
|
+
YAML.safe_load(ALLOWLIST_PATH.read, permitted_classes: [], aliases: false) || {}
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
# ignore_paths entries each carry a `path` glob, a `reason`, and an OPTIONAL
|
|
72
|
+
# `rule`. Without `rule` the whole file is exempt from every check (e.g. the
|
|
73
|
+
# core/ primitives layer). With `rule` only that one check is exempt in the
|
|
74
|
+
# matching files (e.g. ruby_string_ptr inside the bridge/ boundary), so the same
|
|
75
|
+
# pattern anywhere else still trips the lint.
|
|
76
|
+
def load_ignore_paths(raw)
|
|
77
|
+
entries = raw.fetch("ignore_paths", [])
|
|
78
|
+
unless entries.is_a?(Array)
|
|
79
|
+
abort "invalid allowlist: top-level 'ignore_paths' must be an array"
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
entries.each_with_index do |entry, idx|
|
|
83
|
+
%w[path reason].each do |key|
|
|
84
|
+
value = entry[key]
|
|
85
|
+
if value.nil? || (value.respond_to?(:empty?) && value.empty?)
|
|
86
|
+
abort "invalid ignore_paths entry ##{idx + 1}: missing #{key}"
|
|
87
|
+
end
|
|
88
|
+
end
|
|
89
|
+
rule = entry["rule"]
|
|
90
|
+
if rule && RULES.none? { |r| r.id == rule }
|
|
91
|
+
abort "invalid ignore_paths entry ##{idx + 1}: unknown rule '#{rule}'"
|
|
92
|
+
end
|
|
93
|
+
end
|
|
94
|
+
entries
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
def path_matches?(pattern, path)
|
|
98
|
+
File.fnmatch?(pattern, path, File::FNM_PATHNAME) ||
|
|
99
|
+
File.fnmatch?(pattern, path, File::FNM_PATHNAME | File::FNM_EXTGLOB)
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
# Whole-file ignore (no `rule`): the file is not scanned at all.
|
|
103
|
+
def fully_ignored?(path, ignores)
|
|
104
|
+
ignores.any? { |e| e["rule"].nil? && path_matches?(e["path"], path) }
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
# (path, rule) ignore: drop just that rule's findings in matching files.
|
|
108
|
+
def rule_ignored?(path, rule_id, ignores)
|
|
109
|
+
ignores.any? { |e| e["rule"] == rule_id && path_matches?(e["path"], path) }
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
def target_files(ignores)
|
|
113
|
+
Dir.glob(ROOT.join("ext/makiri/**/*.{c,h}").to_s).sort.reject do |file|
|
|
114
|
+
rel = Pathname.new(file).relative_path_from(ROOT).to_s
|
|
115
|
+
fully_ignored?(rel, ignores)
|
|
116
|
+
end
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
def code_line?(line)
|
|
120
|
+
stripped = line.strip
|
|
121
|
+
return false if stripped.empty?
|
|
122
|
+
return false if stripped.start_with?("//", "/*", "*")
|
|
123
|
+
|
|
124
|
+
true
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
def scan_findings(ignores)
|
|
128
|
+
target_files(ignores).flat_map do |file|
|
|
129
|
+
rel = Pathname.new(file).relative_path_from(ROOT).to_s
|
|
130
|
+
File.readlines(file).flat_map.with_index(1) do |line, lineno|
|
|
131
|
+
next [] unless code_line?(line)
|
|
132
|
+
|
|
133
|
+
RULES.filter_map do |rule|
|
|
134
|
+
next unless line.match?(rule.regex)
|
|
135
|
+
next if rule_ignored?(rel, rule.id, ignores)
|
|
136
|
+
|
|
137
|
+
Finding.new(path: rel, line: lineno, rule: rule.id, text: line.strip)
|
|
138
|
+
end
|
|
139
|
+
end
|
|
140
|
+
end
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
def load_allowlist(raw)
|
|
144
|
+
entries = raw.fetch("allowlist", [])
|
|
145
|
+
unless entries.is_a?(Array)
|
|
146
|
+
abort "invalid allowlist: top-level 'allowlist' must be an array"
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
entries.each_with_index do |entry, idx|
|
|
150
|
+
%w[path rule max reason].each do |key|
|
|
151
|
+
value = entry[key]
|
|
152
|
+
if value.nil? || (value.respond_to?(:empty?) && value.empty?)
|
|
153
|
+
abort "invalid allowlist entry ##{idx + 1}: missing #{key}"
|
|
154
|
+
end
|
|
155
|
+
end
|
|
156
|
+
unless entry["max"].is_a?(Integer) && entry["max"].positive?
|
|
157
|
+
abort "invalid allowlist entry ##{idx + 1}: max must be a positive integer"
|
|
158
|
+
end
|
|
159
|
+
end
|
|
160
|
+
entries
|
|
161
|
+
end
|
|
162
|
+
|
|
163
|
+
def allowed_counts(entries)
|
|
164
|
+
entries.each_with_object(Hash.new(0)) do |entry, h|
|
|
165
|
+
key = [entry["path"], entry["rule"]]
|
|
166
|
+
h[key] += entry["max"]
|
|
167
|
+
end
|
|
168
|
+
end
|
|
169
|
+
|
|
170
|
+
def finding_key(finding)
|
|
171
|
+
[finding.path, finding.rule]
|
|
172
|
+
end
|
|
173
|
+
|
|
174
|
+
def dump_baseline(findings)
|
|
175
|
+
puts "ignore_paths:"
|
|
176
|
+
puts " - path: ext/makiri/core/**"
|
|
177
|
+
puts " reason: Safe allocation and buffer helper internals intentionally contain primitive allocation patterns."
|
|
178
|
+
puts ""
|
|
179
|
+
|
|
180
|
+
puts "allowlist:"
|
|
181
|
+
findings.group_by { |f| [f.path, f.rule] }.sort.each do |(path, rule), group|
|
|
182
|
+
puts " - path: #{path}"
|
|
183
|
+
puts " rule: #{rule}"
|
|
184
|
+
puts " max: #{group.length}"
|
|
185
|
+
puts " reason: Baseline existing occurrence; remove as the C safety refactor replaces it."
|
|
186
|
+
end
|
|
187
|
+
end
|
|
188
|
+
|
|
189
|
+
options = { dump_baseline: false, no_allowlist: false }
|
|
190
|
+
OptionParser.new do |opts|
|
|
191
|
+
opts.on("--dump-baseline", "Print an allowlist for the current tree") do
|
|
192
|
+
options[:dump_baseline] = true
|
|
193
|
+
end
|
|
194
|
+
opts.on("--no-allowlist", "--ignore-allowlist", "Report every finding without applying the allowlist") do
|
|
195
|
+
options[:no_allowlist] = true
|
|
196
|
+
end
|
|
197
|
+
end.parse!
|
|
198
|
+
|
|
199
|
+
config = load_config
|
|
200
|
+
ignores = load_ignore_paths(config)
|
|
201
|
+
findings = scan_findings(ignores)
|
|
202
|
+
|
|
203
|
+
if options[:dump_baseline]
|
|
204
|
+
dump_baseline(findings)
|
|
205
|
+
exit 0
|
|
206
|
+
end
|
|
207
|
+
|
|
208
|
+
allow_counts = options[:no_allowlist] ? Hash.new(0) : allowed_counts(load_allowlist(config))
|
|
209
|
+
seen = Hash.new(0)
|
|
210
|
+
violations = []
|
|
211
|
+
|
|
212
|
+
findings.each do |finding|
|
|
213
|
+
key = finding_key(finding)
|
|
214
|
+
seen[key] += 1
|
|
215
|
+
allowed = allow_counts[key]
|
|
216
|
+
next if seen[key] <= allowed
|
|
217
|
+
|
|
218
|
+
violations << finding
|
|
219
|
+
end
|
|
220
|
+
|
|
221
|
+
if violations.empty?
|
|
222
|
+
puts "C safety lint passed (#{findings.length} checked finding(s), all allowlisted)."
|
|
223
|
+
exit 0
|
|
224
|
+
end
|
|
225
|
+
|
|
226
|
+
if options[:no_allowlist]
|
|
227
|
+
warn "C safety lint failed: #{violations.length} finding(s) with allowlist disabled"
|
|
228
|
+
else
|
|
229
|
+
warn "C safety lint failed: #{violations.length} unallowlisted finding(s)"
|
|
230
|
+
end
|
|
231
|
+
violations.each do |finding|
|
|
232
|
+
rule = RULES.find { |r| r.id == finding.rule }
|
|
233
|
+
warn "#{finding.path}:#{finding.line}: #{finding.rule}: #{rule&.message}"
|
|
234
|
+
warn " #{finding.text}"
|
|
235
|
+
end
|
|
236
|
+
warn
|
|
237
|
+
warn "If this is intentionally safe, add a narrow entry with a reason to #{ALLOWLIST_PATH.relative_path_from(ROOT)}."
|
|
238
|
+
exit 1
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
ignore_paths:
|
|
2
|
+
- path: ext/makiri/core/**
|
|
3
|
+
reason: The Ruby-free safe-core primitives layer; it deliberately holds the raw allocation/length patterns that every other layer routes through.
|
|
4
|
+
# --- the Ruby boundary (bridge/): raw Ruby String access and mkr_verified_text_t
|
|
5
|
+
# minting are this layer's defining purpose, exempt per (file, rule) so the
|
|
6
|
+
# same patterns anywhere else still trip the lint ---
|
|
7
|
+
- path: ext/makiri/bridge/ruby_string.c
|
|
8
|
+
rule: ruby_string_ptr
|
|
9
|
+
reason: "Ruby string boundary helpers only: strict text validation, raw HTML byte views, engine-string validation, and fail-closed exception-message copying. Raw RSTRING access is this file's purpose; it never escapes to the engine."
|
|
10
|
+
- path: ext/makiri/bridge/text_token.c
|
|
11
|
+
rule: verified_text_forge
|
|
12
|
+
reason: mkr_verified_text_from_view, the sole sanctioned mint of mkr_verified_text_t; its input view is already validated by the bridge string helpers.
|
data/sig/makiri.rbs
ADDED
metadata
ADDED
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
|
+
name: makiri
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
version: 0.1.0
|
|
5
|
+
platform: aarch64-linux
|
|
6
|
+
authors:
|
|
7
|
+
- takahashim
|
|
8
|
+
bindir: exe
|
|
9
|
+
cert_chain: []
|
|
10
|
+
date: 1980-01-02 00:00:00.000000000 Z
|
|
11
|
+
dependencies:
|
|
12
|
+
- !ruby/object:Gem::Dependency
|
|
13
|
+
name: rake
|
|
14
|
+
requirement: !ruby/object:Gem::Requirement
|
|
15
|
+
requirements:
|
|
16
|
+
- - "~>"
|
|
17
|
+
- !ruby/object:Gem::Version
|
|
18
|
+
version: '13.0'
|
|
19
|
+
type: :development
|
|
20
|
+
prerelease: false
|
|
21
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
22
|
+
requirements:
|
|
23
|
+
- - "~>"
|
|
24
|
+
- !ruby/object:Gem::Version
|
|
25
|
+
version: '13.0'
|
|
26
|
+
- !ruby/object:Gem::Dependency
|
|
27
|
+
name: rake-compiler
|
|
28
|
+
requirement: !ruby/object:Gem::Requirement
|
|
29
|
+
requirements:
|
|
30
|
+
- - "~>"
|
|
31
|
+
- !ruby/object:Gem::Version
|
|
32
|
+
version: '1.2'
|
|
33
|
+
type: :development
|
|
34
|
+
prerelease: false
|
|
35
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
36
|
+
requirements:
|
|
37
|
+
- - "~>"
|
|
38
|
+
- !ruby/object:Gem::Version
|
|
39
|
+
version: '1.2'
|
|
40
|
+
- !ruby/object:Gem::Dependency
|
|
41
|
+
name: rspec
|
|
42
|
+
requirement: !ruby/object:Gem::Requirement
|
|
43
|
+
requirements:
|
|
44
|
+
- - "~>"
|
|
45
|
+
- !ruby/object:Gem::Version
|
|
46
|
+
version: '3.13'
|
|
47
|
+
type: :development
|
|
48
|
+
prerelease: false
|
|
49
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
50
|
+
requirements:
|
|
51
|
+
- - "~>"
|
|
52
|
+
- !ruby/object:Gem::Version
|
|
53
|
+
version: '3.13'
|
|
54
|
+
description: |
|
|
55
|
+
Makiri parses HTML5 documents via the Lexbor library
|
|
56
|
+
and queries them with a native XPath 1.0 engine written for this project.
|
|
57
|
+
It does not depend on libxml2 at any layer. The API is
|
|
58
|
+
Nokogiri-compatible for the subset of methods used in HTML scraping.
|
|
59
|
+
email:
|
|
60
|
+
- takahashimm@gmail.com
|
|
61
|
+
executables: []
|
|
62
|
+
extensions: []
|
|
63
|
+
extra_rdoc_files: []
|
|
64
|
+
files:
|
|
65
|
+
- ".github/workflows/ci.yml"
|
|
66
|
+
- ".github/workflows/conformance.yml"
|
|
67
|
+
- ".github/workflows/release.yml"
|
|
68
|
+
- ".github/workflows/security.yml"
|
|
69
|
+
- ".gitmodules"
|
|
70
|
+
- CHANGELOG.md
|
|
71
|
+
- LICENSE
|
|
72
|
+
- NOTICE
|
|
73
|
+
- README.md
|
|
74
|
+
- Rakefile
|
|
75
|
+
- lib/makiri.rb
|
|
76
|
+
- lib/makiri/3.2/makiri.so
|
|
77
|
+
- lib/makiri/3.3/makiri.so
|
|
78
|
+
- lib/makiri/3.4/makiri.so
|
|
79
|
+
- lib/makiri/4.0/makiri.so
|
|
80
|
+
- lib/makiri/attribute.rb
|
|
81
|
+
- lib/makiri/cdata.rb
|
|
82
|
+
- lib/makiri/comment.rb
|
|
83
|
+
- lib/makiri/css.rb
|
|
84
|
+
- lib/makiri/document.rb
|
|
85
|
+
- lib/makiri/document_fragment.rb
|
|
86
|
+
- lib/makiri/document_type.rb
|
|
87
|
+
- lib/makiri/element.rb
|
|
88
|
+
- lib/makiri/node.rb
|
|
89
|
+
- lib/makiri/node_set.rb
|
|
90
|
+
- lib/makiri/processing_instruction.rb
|
|
91
|
+
- lib/makiri/text.rb
|
|
92
|
+
- lib/makiri/version.rb
|
|
93
|
+
- lib/makiri/xpath.rb
|
|
94
|
+
- lib/makiri/xpath_context.rb
|
|
95
|
+
- script/build_native_gem.rb
|
|
96
|
+
- script/check_c_safety.rb
|
|
97
|
+
- script/check_c_safety_allowlist.yml
|
|
98
|
+
- sig/makiri.rbs
|
|
99
|
+
homepage: https://github.com/takahashim/makiri
|
|
100
|
+
licenses:
|
|
101
|
+
- Apache-2.0
|
|
102
|
+
metadata:
|
|
103
|
+
homepage_uri: https://github.com/takahashim/makiri
|
|
104
|
+
bug_tracker_uri: https://github.com/takahashim/makiri/issues
|
|
105
|
+
changelog_uri: https://github.com/takahashim/makiri/blob/main/CHANGELOG.md
|
|
106
|
+
rubygems_mfa_required: 'true'
|
|
107
|
+
rdoc_options: []
|
|
108
|
+
require_paths:
|
|
109
|
+
- lib
|
|
110
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
111
|
+
requirements:
|
|
112
|
+
- - ">="
|
|
113
|
+
- !ruby/object:Gem::Version
|
|
114
|
+
version: 3.2.0
|
|
115
|
+
- - "<"
|
|
116
|
+
- !ruby/object:Gem::Version
|
|
117
|
+
version: 4.1.dev
|
|
118
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
119
|
+
requirements:
|
|
120
|
+
- - ">="
|
|
121
|
+
- !ruby/object:Gem::Version
|
|
122
|
+
version: '0'
|
|
123
|
+
requirements: []
|
|
124
|
+
rubygems_version: 3.6.9
|
|
125
|
+
specification_version: 4
|
|
126
|
+
summary: HTML5 parser + native XPath 1.0 for Ruby, with no libxml2 dependency.
|
|
127
|
+
test_files: []
|