makiri 0.1.0-aarch64-linux

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,238 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ require "optparse"
5
+ require "pathname"
6
+ require "yaml"
7
+
8
+ ROOT = Pathname.new(__dir__).join("..").expand_path
9
+ ALLOWLIST_PATH = ROOT.join("script/check_c_safety_allowlist.yml")
10
+
11
+ Rule = Struct.new(:id, :message, :regex, keyword_init: true)
12
+ Finding = Struct.new(:path, :line, :rule, :text, keyword_init: true)
13
+
14
+ RULES = [
15
+ Rule.new(
16
+ id: "string_value_cstr",
17
+ message: "StringValueCStr bypasses explicit ptr/len handling",
18
+ regex: /\bStringValueCStr\s*\(/
19
+ ),
20
+ Rule.new(
21
+ id: "direct_alloc",
22
+ message: "direct (x)malloc/calloc/realloc/strdup / ALLOC_N must go through safe helpers",
23
+ regex: /\b(?:x?(?:malloc|calloc|realloc|strdup)|ALLOC_N|REALLOC_N)\s*\(/
24
+ ),
25
+ Rule.new(
26
+ id: "direct_strlen",
27
+ message: "strlen requires an explicitly no-NUL checked C string",
28
+ regex: /\bstrlen\s*\(/
29
+ ),
30
+ Rule.new(
31
+ id: "alloca",
32
+ message: "ALLOCA_N uses stack space based on runtime input",
33
+ regex: /\bALLOCA_N\s*\(/
34
+ ),
35
+ Rule.new(
36
+ id: "ruby_string_ptr",
37
+ message: "RSTRING_PTR/RSTRING_LEN must be isolated in checked Ruby input helpers",
38
+ regex: /\bRSTRING_(?:PTR|LEN)\s*\(/
39
+ ),
40
+ Rule.new(
41
+ id: "allocation_plus_one",
42
+ message: "allocation sizes using + 1 need overflow-checked helpers",
43
+ regex: /\b(?:malloc|calloc|realloc|xrealloc)\s*\([^;\n]*\+\s*1\b/
44
+ ),
45
+ Rule.new(
46
+ id: "sizeof_allocation",
47
+ message: "count * sizeof(...) must be overflow-checked before allocation/copy sizing",
48
+ regex: /\*\s*sizeof\s*\(/
49
+ ),
50
+ Rule.new(
51
+ id: "cap_times_two",
52
+ message: "capacity doubling must use an overflow-checked grow helper",
53
+ regex: /(?:\b\w*cap\w*\s*\*\s*2\b|\b2\s*\*\s*\w*cap\w*\b)/
54
+ ),
55
+ Rule.new(
56
+ id: "while_cap_double",
57
+ message: "looped cap *= 2 growth must use an overflow-checked grow helper",
58
+ regex: /while\s*\([^)]*>\s*[^)]*cap[^)]*\).*?\*=\s*2/
59
+ ),
60
+ Rule.new(
61
+ id: "verified_text_forge",
62
+ message: "mkr_verified_text_t must be minted only by mkr_verified_text_from_view (the validated boundary)",
63
+ regex: /\(\s*mkr_verified_text_t\s*\)\s*\{/
64
+ ),
65
+ ].freeze
66
+
67
+ def load_config
68
+ YAML.safe_load(ALLOWLIST_PATH.read, permitted_classes: [], aliases: false) || {}
69
+ end
70
+
71
+ # ignore_paths entries each carry a `path` glob, a `reason`, and an OPTIONAL
72
+ # `rule`. Without `rule` the whole file is exempt from every check (e.g. the
73
+ # core/ primitives layer). With `rule` only that one check is exempt in the
74
+ # matching files (e.g. ruby_string_ptr inside the bridge/ boundary), so the same
75
+ # pattern anywhere else still trips the lint.
76
+ def load_ignore_paths(raw)
77
+ entries = raw.fetch("ignore_paths", [])
78
+ unless entries.is_a?(Array)
79
+ abort "invalid allowlist: top-level 'ignore_paths' must be an array"
80
+ end
81
+
82
+ entries.each_with_index do |entry, idx|
83
+ %w[path reason].each do |key|
84
+ value = entry[key]
85
+ if value.nil? || (value.respond_to?(:empty?) && value.empty?)
86
+ abort "invalid ignore_paths entry ##{idx + 1}: missing #{key}"
87
+ end
88
+ end
89
+ rule = entry["rule"]
90
+ if rule && RULES.none? { |r| r.id == rule }
91
+ abort "invalid ignore_paths entry ##{idx + 1}: unknown rule '#{rule}'"
92
+ end
93
+ end
94
+ entries
95
+ end
96
+
97
+ def path_matches?(pattern, path)
98
+ File.fnmatch?(pattern, path, File::FNM_PATHNAME) ||
99
+ File.fnmatch?(pattern, path, File::FNM_PATHNAME | File::FNM_EXTGLOB)
100
+ end
101
+
102
+ # Whole-file ignore (no `rule`): the file is not scanned at all.
103
+ def fully_ignored?(path, ignores)
104
+ ignores.any? { |e| e["rule"].nil? && path_matches?(e["path"], path) }
105
+ end
106
+
107
+ # (path, rule) ignore: drop just that rule's findings in matching files.
108
+ def rule_ignored?(path, rule_id, ignores)
109
+ ignores.any? { |e| e["rule"] == rule_id && path_matches?(e["path"], path) }
110
+ end
111
+
112
+ def target_files(ignores)
113
+ Dir.glob(ROOT.join("ext/makiri/**/*.{c,h}").to_s).sort.reject do |file|
114
+ rel = Pathname.new(file).relative_path_from(ROOT).to_s
115
+ fully_ignored?(rel, ignores)
116
+ end
117
+ end
118
+
119
+ def code_line?(line)
120
+ stripped = line.strip
121
+ return false if stripped.empty?
122
+ return false if stripped.start_with?("//", "/*", "*")
123
+
124
+ true
125
+ end
126
+
127
+ def scan_findings(ignores)
128
+ target_files(ignores).flat_map do |file|
129
+ rel = Pathname.new(file).relative_path_from(ROOT).to_s
130
+ File.readlines(file).flat_map.with_index(1) do |line, lineno|
131
+ next [] unless code_line?(line)
132
+
133
+ RULES.filter_map do |rule|
134
+ next unless line.match?(rule.regex)
135
+ next if rule_ignored?(rel, rule.id, ignores)
136
+
137
+ Finding.new(path: rel, line: lineno, rule: rule.id, text: line.strip)
138
+ end
139
+ end
140
+ end
141
+ end
142
+
143
+ def load_allowlist(raw)
144
+ entries = raw.fetch("allowlist", [])
145
+ unless entries.is_a?(Array)
146
+ abort "invalid allowlist: top-level 'allowlist' must be an array"
147
+ end
148
+
149
+ entries.each_with_index do |entry, idx|
150
+ %w[path rule max reason].each do |key|
151
+ value = entry[key]
152
+ if value.nil? || (value.respond_to?(:empty?) && value.empty?)
153
+ abort "invalid allowlist entry ##{idx + 1}: missing #{key}"
154
+ end
155
+ end
156
+ unless entry["max"].is_a?(Integer) && entry["max"].positive?
157
+ abort "invalid allowlist entry ##{idx + 1}: max must be a positive integer"
158
+ end
159
+ end
160
+ entries
161
+ end
162
+
163
+ def allowed_counts(entries)
164
+ entries.each_with_object(Hash.new(0)) do |entry, h|
165
+ key = [entry["path"], entry["rule"]]
166
+ h[key] += entry["max"]
167
+ end
168
+ end
169
+
170
+ def finding_key(finding)
171
+ [finding.path, finding.rule]
172
+ end
173
+
174
+ def dump_baseline(findings)
175
+ puts "ignore_paths:"
176
+ puts " - path: ext/makiri/core/**"
177
+ puts " reason: Safe allocation and buffer helper internals intentionally contain primitive allocation patterns."
178
+ puts ""
179
+
180
+ puts "allowlist:"
181
+ findings.group_by { |f| [f.path, f.rule] }.sort.each do |(path, rule), group|
182
+ puts " - path: #{path}"
183
+ puts " rule: #{rule}"
184
+ puts " max: #{group.length}"
185
+ puts " reason: Baseline existing occurrence; remove as the C safety refactor replaces it."
186
+ end
187
+ end
188
+
189
+ options = { dump_baseline: false, no_allowlist: false }
190
+ OptionParser.new do |opts|
191
+ opts.on("--dump-baseline", "Print an allowlist for the current tree") do
192
+ options[:dump_baseline] = true
193
+ end
194
+ opts.on("--no-allowlist", "--ignore-allowlist", "Report every finding without applying the allowlist") do
195
+ options[:no_allowlist] = true
196
+ end
197
+ end.parse!
198
+
199
+ config = load_config
200
+ ignores = load_ignore_paths(config)
201
+ findings = scan_findings(ignores)
202
+
203
+ if options[:dump_baseline]
204
+ dump_baseline(findings)
205
+ exit 0
206
+ end
207
+
208
+ allow_counts = options[:no_allowlist] ? Hash.new(0) : allowed_counts(load_allowlist(config))
209
+ seen = Hash.new(0)
210
+ violations = []
211
+
212
+ findings.each do |finding|
213
+ key = finding_key(finding)
214
+ seen[key] += 1
215
+ allowed = allow_counts[key]
216
+ next if seen[key] <= allowed
217
+
218
+ violations << finding
219
+ end
220
+
221
+ if violations.empty?
222
+ puts "C safety lint passed (#{findings.length} checked finding(s), all allowlisted)."
223
+ exit 0
224
+ end
225
+
226
+ if options[:no_allowlist]
227
+ warn "C safety lint failed: #{violations.length} finding(s) with allowlist disabled"
228
+ else
229
+ warn "C safety lint failed: #{violations.length} unallowlisted finding(s)"
230
+ end
231
+ violations.each do |finding|
232
+ rule = RULES.find { |r| r.id == finding.rule }
233
+ warn "#{finding.path}:#{finding.line}: #{finding.rule}: #{rule&.message}"
234
+ warn " #{finding.text}"
235
+ end
236
+ warn
237
+ warn "If this is intentionally safe, add a narrow entry with a reason to #{ALLOWLIST_PATH.relative_path_from(ROOT)}."
238
+ exit 1
@@ -0,0 +1,12 @@
1
+ ignore_paths:
2
+ - path: ext/makiri/core/**
3
+ reason: The Ruby-free safe-core primitives layer; it deliberately holds the raw allocation/length patterns that every other layer routes through.
4
+ # --- the Ruby boundary (bridge/): raw Ruby String access and mkr_verified_text_t
5
+ # minting are this layer's defining purpose, exempt per (file, rule) so the
6
+ # same patterns anywhere else still trip the lint ---
7
+ - path: ext/makiri/bridge/ruby_string.c
8
+ rule: ruby_string_ptr
9
+ reason: "Ruby string boundary helpers only: strict text validation, raw HTML byte views, engine-string validation, and fail-closed exception-message copying. Raw RSTRING access is this file's purpose; it never escapes to the engine."
10
+ - path: ext/makiri/bridge/text_token.c
11
+ rule: verified_text_forge
12
+ reason: mkr_verified_text_from_view, the sole sanctioned mint of mkr_verified_text_t; its input view is already validated by the bridge string helpers.
data/sig/makiri.rbs ADDED
@@ -0,0 +1,4 @@
1
+ module Makiri
2
+ VERSION: String
3
+ # See the writing guide of rbs: https://github.com/ruby/rbs#guides
4
+ end
metadata ADDED
@@ -0,0 +1,127 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: makiri
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: aarch64-linux
6
+ authors:
7
+ - takahashim
8
+ bindir: exe
9
+ cert_chain: []
10
+ date: 1980-01-02 00:00:00.000000000 Z
11
+ dependencies:
12
+ - !ruby/object:Gem::Dependency
13
+ name: rake
14
+ requirement: !ruby/object:Gem::Requirement
15
+ requirements:
16
+ - - "~>"
17
+ - !ruby/object:Gem::Version
18
+ version: '13.0'
19
+ type: :development
20
+ prerelease: false
21
+ version_requirements: !ruby/object:Gem::Requirement
22
+ requirements:
23
+ - - "~>"
24
+ - !ruby/object:Gem::Version
25
+ version: '13.0'
26
+ - !ruby/object:Gem::Dependency
27
+ name: rake-compiler
28
+ requirement: !ruby/object:Gem::Requirement
29
+ requirements:
30
+ - - "~>"
31
+ - !ruby/object:Gem::Version
32
+ version: '1.2'
33
+ type: :development
34
+ prerelease: false
35
+ version_requirements: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - "~>"
38
+ - !ruby/object:Gem::Version
39
+ version: '1.2'
40
+ - !ruby/object:Gem::Dependency
41
+ name: rspec
42
+ requirement: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - "~>"
45
+ - !ruby/object:Gem::Version
46
+ version: '3.13'
47
+ type: :development
48
+ prerelease: false
49
+ version_requirements: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - "~>"
52
+ - !ruby/object:Gem::Version
53
+ version: '3.13'
54
+ description: |
55
+ Makiri parses HTML5 documents via the Lexbor library
56
+ and queries them with a native XPath 1.0 engine written for this project.
57
+ It does not depend on libxml2 at any layer. The API is
58
+ Nokogiri-compatible for the subset of methods used in HTML scraping.
59
+ email:
60
+ - takahashimm@gmail.com
61
+ executables: []
62
+ extensions: []
63
+ extra_rdoc_files: []
64
+ files:
65
+ - ".github/workflows/ci.yml"
66
+ - ".github/workflows/conformance.yml"
67
+ - ".github/workflows/release.yml"
68
+ - ".github/workflows/security.yml"
69
+ - ".gitmodules"
70
+ - CHANGELOG.md
71
+ - LICENSE
72
+ - NOTICE
73
+ - README.md
74
+ - Rakefile
75
+ - lib/makiri.rb
76
+ - lib/makiri/3.2/makiri.so
77
+ - lib/makiri/3.3/makiri.so
78
+ - lib/makiri/3.4/makiri.so
79
+ - lib/makiri/4.0/makiri.so
80
+ - lib/makiri/attribute.rb
81
+ - lib/makiri/cdata.rb
82
+ - lib/makiri/comment.rb
83
+ - lib/makiri/css.rb
84
+ - lib/makiri/document.rb
85
+ - lib/makiri/document_fragment.rb
86
+ - lib/makiri/document_type.rb
87
+ - lib/makiri/element.rb
88
+ - lib/makiri/node.rb
89
+ - lib/makiri/node_set.rb
90
+ - lib/makiri/processing_instruction.rb
91
+ - lib/makiri/text.rb
92
+ - lib/makiri/version.rb
93
+ - lib/makiri/xpath.rb
94
+ - lib/makiri/xpath_context.rb
95
+ - script/build_native_gem.rb
96
+ - script/check_c_safety.rb
97
+ - script/check_c_safety_allowlist.yml
98
+ - sig/makiri.rbs
99
+ homepage: https://github.com/takahashim/makiri
100
+ licenses:
101
+ - Apache-2.0
102
+ metadata:
103
+ homepage_uri: https://github.com/takahashim/makiri
104
+ bug_tracker_uri: https://github.com/takahashim/makiri/issues
105
+ changelog_uri: https://github.com/takahashim/makiri/blob/main/CHANGELOG.md
106
+ rubygems_mfa_required: 'true'
107
+ rdoc_options: []
108
+ require_paths:
109
+ - lib
110
+ required_ruby_version: !ruby/object:Gem::Requirement
111
+ requirements:
112
+ - - ">="
113
+ - !ruby/object:Gem::Version
114
+ version: 3.2.0
115
+ - - "<"
116
+ - !ruby/object:Gem::Version
117
+ version: 4.1.dev
118
+ required_rubygems_version: !ruby/object:Gem::Requirement
119
+ requirements:
120
+ - - ">="
121
+ - !ruby/object:Gem::Version
122
+ version: '0'
123
+ requirements: []
124
+ rubygems_version: 3.6.9
125
+ specification_version: 4
126
+ summary: HTML5 parser + native XPath 1.0 for Ruby, with no libxml2 dependency.
127
+ test_files: []