suma 0.2.0 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop_todo.yml +6 -23
- data/README.adoc +0 -115
- data/lib/suma/cli/validate.rb +0 -14
- data/lib/suma/cli.rb +1 -0
- data/lib/suma/version.rb +1 -1
- metadata +2 -3
- data/lib/suma/cli/validate_ascii.rb +0 -551
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: e902246136c35ed32e6e71e792164b0582ee3612ba0e9bdf185c84f1000a5f40
|
|
4
|
+
data.tar.gz: b2793475b6863c62f791aee2b6fa2851e6e6e8b04155271bacf8eaa92f879749
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: e0854a1e32d238a03ee0887b1de2c11c525d5be4e621cee77b94213b7fd10d1de9d46b5ec5eb1d6858ca822bebd04d3c26dc2fde3c97019d057bf83d70d27607
|
|
7
|
+
data.tar.gz: abef015ecde9b9141ea5eb67d16cdfdbbb3c0d4560daecea23e9ebc9ecbb50f45bf1377c2c8d674aa15bb21fce3fe00c7628bb413325d11488174572734b80c8
|
data/.rubocop_todo.yml
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# This configuration was generated by
|
|
2
2
|
# `rubocop --auto-gen-config`
|
|
3
|
-
# on 2025-
|
|
3
|
+
# on 2025-12-11 03:58:36 UTC using RuboCop version 1.81.1.
|
|
4
4
|
# The point is for the user to remove these configuration records
|
|
5
5
|
# one by one as the offenses are removed from the code base.
|
|
6
6
|
# Note that changes in the inspected code, or installation of new
|
|
@@ -18,21 +18,13 @@ Gemspec/RequiredRubyVersion:
|
|
|
18
18
|
Exclude:
|
|
19
19
|
- 'suma.gemspec'
|
|
20
20
|
|
|
21
|
-
# Offense count:
|
|
21
|
+
# Offense count: 81
|
|
22
22
|
# This cop supports safe autocorrection (--autocorrect).
|
|
23
23
|
# Configuration parameters: Max, AllowHeredoc, AllowURI, AllowQualifiedName, URISchemes, IgnoreCopDirectives, AllowedPatterns, SplitStrings.
|
|
24
24
|
# URISchemes: http, https
|
|
25
25
|
Layout/LineLength:
|
|
26
26
|
Enabled: false
|
|
27
27
|
|
|
28
|
-
# Offense count: 1
|
|
29
|
-
# This cop supports safe autocorrection (--autocorrect).
|
|
30
|
-
# Configuration parameters: EnforcedStyle.
|
|
31
|
-
# SupportedStyles: final_newline, final_blank_line
|
|
32
|
-
Layout/TrailingEmptyLines:
|
|
33
|
-
Exclude:
|
|
34
|
-
- 'Gemfile'
|
|
35
|
-
|
|
36
28
|
# Offense count: 2
|
|
37
29
|
# Configuration parameters: IgnoreLiteralBranches, IgnoreConstantBranches, IgnoreDuplicateElseBranch.
|
|
38
30
|
Lint/DuplicateBranch:
|
|
@@ -91,14 +83,13 @@ Naming/PredicateMethod:
|
|
|
91
83
|
Exclude:
|
|
92
84
|
- 'lib/suma/eengine/wrapper.rb'
|
|
93
85
|
|
|
94
|
-
# Offense count:
|
|
86
|
+
# Offense count: 2
|
|
95
87
|
# Configuration parameters: EnforcedStyle, CheckMethodNames, CheckSymbols, AllowedIdentifiers, AllowedPatterns.
|
|
96
88
|
# SupportedStyles: snake_case, normalcase, non_integer
|
|
97
89
|
# AllowedIdentifiers: TLS1_1, TLS1_2, capture3, iso8601, rfc1123_date, rfc822, rfc2822, rfc3339, x86_64
|
|
98
90
|
Naming/VariableNumber:
|
|
99
91
|
Exclude:
|
|
100
92
|
- 'spec/suma/cli/export_spec.rb'
|
|
101
|
-
- 'spec/suma/cli/validate_ascii_spec.rb'
|
|
102
93
|
|
|
103
94
|
# Offense count: 3
|
|
104
95
|
# Configuration parameters: MinSize.
|
|
@@ -115,7 +106,7 @@ RSpec/ContextWording:
|
|
|
115
106
|
Exclude:
|
|
116
107
|
- 'spec/suma/cli/export_spec.rb'
|
|
117
108
|
|
|
118
|
-
# Offense count:
|
|
109
|
+
# Offense count: 48
|
|
119
110
|
# Configuration parameters: CountAsOne.
|
|
120
111
|
RSpec/ExampleLength:
|
|
121
112
|
Max: 61
|
|
@@ -133,7 +124,7 @@ RSpec/InstanceVariable:
|
|
|
133
124
|
Exclude:
|
|
134
125
|
- 'spec/suma/cli/compare_spec.rb'
|
|
135
126
|
|
|
136
|
-
# Offense count:
|
|
127
|
+
# Offense count: 1
|
|
137
128
|
# This cop supports safe autocorrection (--autocorrect).
|
|
138
129
|
RSpec/IteratedExpectation:
|
|
139
130
|
Exclude:
|
|
@@ -146,7 +137,7 @@ RSpec/MessageSpies:
|
|
|
146
137
|
Exclude:
|
|
147
138
|
- 'spec/suma/cli/compare_spec.rb'
|
|
148
139
|
|
|
149
|
-
# Offense count:
|
|
140
|
+
# Offense count: 43
|
|
150
141
|
RSpec/MultipleExpectations:
|
|
151
142
|
Max: 23
|
|
152
143
|
|
|
@@ -161,11 +152,3 @@ RSpec/SpecFilePathFormat:
|
|
|
161
152
|
RSpec/StubbedMock:
|
|
162
153
|
Exclude:
|
|
163
154
|
- 'spec/suma/cli/compare_spec.rb'
|
|
164
|
-
|
|
165
|
-
# Offense count: 1
|
|
166
|
-
# This cop supports unsafe autocorrection (--autocorrect-all).
|
|
167
|
-
# Configuration parameters: AllowedReceivers.
|
|
168
|
-
# AllowedReceivers: Thread.current
|
|
169
|
-
Style/HashEachMethods:
|
|
170
|
-
Exclude:
|
|
171
|
-
- 'spec/suma/cli/compare_spec.rb'
|
data/README.adoc
CHANGED
|
@@ -140,7 +140,6 @@ $ suma validate SUBCOMMAND [options]
|
|
|
140
140
|
Subcommands:
|
|
141
141
|
|
|
142
142
|
`links`:: Validate EXPRESS links
|
|
143
|
-
`ascii`:: Check for non-ASCII characters in EXPRESS files
|
|
144
143
|
|
|
145
144
|
==== Links subcommand
|
|
146
145
|
|
|
@@ -176,120 +175,6 @@ This command:
|
|
|
176
175
|
* Writes validation results to the `OUTPUT_FILE`
|
|
177
176
|
* Provides progress bars to track schema loading and link validation
|
|
178
177
|
|
|
179
|
-
==== ASCII subcommand
|
|
180
|
-
|
|
181
|
-
The `ascii` subcommand detects non-ASCII characters in EXPRESS files and reports on those exact lines, providing replacement suggestions.
|
|
182
|
-
|
|
183
|
-
[source,sh]
|
|
184
|
-
----
|
|
185
|
-
$ suma validate ascii EXPRESS_FILE_PATH [options]
|
|
186
|
-
----
|
|
187
|
-
|
|
188
|
-
Parameters:
|
|
189
|
-
|
|
190
|
-
`EXPRESS_FILE_PATH`:: Path to an EXPRESS file or a folder containing EXPRESS
|
|
191
|
-
files
|
|
192
|
-
|
|
193
|
-
Options:
|
|
194
|
-
|
|
195
|
-
`--[no-]recursive`, `-r`:: Select EXPRESS files recursively based on the specified
|
|
196
|
-
folder path (default: false)
|
|
197
|
-
`--[no-]yaml`, `-y`:: Output results in YAML format for machine processing (default: false)
|
|
198
|
-
|
|
199
|
-
[example]
|
|
200
|
-
====
|
|
201
|
-
.To validate all EXPRESS files in a specific directory recursively
|
|
202
|
-
[source,sh]
|
|
203
|
-
----
|
|
204
|
-
$ bundle exec suma validate ascii ../iso-10303/schemas -r
|
|
205
|
-
----
|
|
206
|
-
|
|
207
|
-
.To validate and output results in YAML format
|
|
208
|
-
[source,sh]
|
|
209
|
-
----
|
|
210
|
-
$ bundle exec suma validate ascii ../iso-10303/schemas -r -y > validation.yml
|
|
211
|
-
----
|
|
212
|
-
====
|
|
213
|
-
|
|
214
|
-
This command:
|
|
215
|
-
|
|
216
|
-
* Loads the EXPRESS files specified in the `EXPRESS_FILE_PATH`
|
|
217
|
-
* Scans each line for non-ASCII characters
|
|
218
|
-
* Reports detailed information about each violation, including:
|
|
219
|
-
** Filename and line number
|
|
220
|
-
** The exact line content
|
|
221
|
-
** Visual indication of the non-ASCII sequence location
|
|
222
|
-
** Character details with hexadecimal representation
|
|
223
|
-
* Provides specific replacement suggestions:
|
|
224
|
-
** For math symbols: provides equivalent AsciiMath notation
|
|
225
|
-
** For other non-ASCII characters: provides ISO 10303-11 encoded string literal format
|
|
226
|
-
* Displays a summary table showing:
|
|
227
|
-
** File path (directory/filename)
|
|
228
|
-
** Each non-ASCII symbol found
|
|
229
|
-
** Suggested replacement for each symbol
|
|
230
|
-
** Number of occurrences of each character
|
|
231
|
-
** Totals row showing unique character count and overall occurrences
|
|
232
|
-
* Summarizes findings across all scanned files
|
|
233
|
-
* Optionally outputs structured data in YAML format with detailed occurrence information
|
|
234
|
-
|
|
235
|
-
Human-readable output format example:
|
|
236
|
-
|
|
237
|
-
[source,text]
|
|
238
|
-
----
|
|
239
|
-
/path/to/file.exp:
|
|
240
|
-
Line 42, Column 15:
|
|
241
|
-
ENTITY some_entity (name: STRING, description: "résumé");
|
|
242
|
-
^^^^^
|
|
243
|
-
"é" - Hex: 0xe9, UTF-8 bytes: 0xc3 0xa9
|
|
244
|
-
Replacement: ISO 10303-11: "000000E9"
|
|
245
|
-
|
|
246
|
-
"s" - Hex: 0x73, UTF-8 bytes: 0x73
|
|
247
|
-
|
|
248
|
-
"u" - Hex: 0x75, UTF-8 bytes: 0x75
|
|
249
|
-
|
|
250
|
-
"m" - Hex: 0x6d, UTF-8 bytes: 0x6d
|
|
251
|
-
|
|
252
|
-
"é" - Hex: 0xe9, UTF-8 bytes: 0xc3 0xa9
|
|
253
|
-
Replacement: ISO 10303-11: "000000E9"
|
|
254
|
-
|
|
255
|
-
Found 1 non-ASCII sequence(s) in file.exp
|
|
256
|
-
|
|
257
|
-
Summary:
|
|
258
|
-
Scanned 3 EXPRESS file(s)
|
|
259
|
-
Found 1 non-ASCII sequence(s) in 1 file(s)
|
|
260
|
-
|
|
261
|
-
+------------------+--------------------+-----------------------------+-------------+
|
|
262
|
-
| File | Symbol | Replacement | Occurrences |
|
|
263
|
-
+------------------+--------------------+-----------------------------+-------------+
|
|
264
|
-
| path/to/file.exp | "é" (0xe9) | ISO 10303-11: "000000E9" | 2 |
|
|
265
|
-
+------------------+--------------------+-----------------------------+-------------+
|
|
266
|
-
| TOTAL | 1 unique | | 2 |
|
|
267
|
-
+------------------+--------------------+-----------------------------+-------------+
|
|
268
|
-
----
|
|
269
|
-
|
|
270
|
-
===== Japanese Character Example
|
|
271
|
-
|
|
272
|
-
For Japanese characters like 神戸 (Kobe), the command will provide ISO 10303-11 encoded string literal replacements:
|
|
273
|
-
|
|
274
|
-
[source,text]
|
|
275
|
-
----
|
|
276
|
-
"神" - Hex: 0x795e, UTF-8 bytes: 0xe7 0xa5 0x9e
|
|
277
|
-
Replacement: ISO 10303-11: "0000795E"
|
|
278
|
-
|
|
279
|
-
"戸" - Hex: 0x6238, UTF-8 bytes: 0xe6 0x88 0xb8
|
|
280
|
-
Replacement: ISO 10303-11: "00006238"
|
|
281
|
-
----
|
|
282
|
-
|
|
283
|
-
===== Math Symbol Example
|
|
284
|
-
|
|
285
|
-
For mathematical symbols, the command will provide equivalent AsciiMath notation:
|
|
286
|
-
|
|
287
|
-
[source,text]
|
|
288
|
-
----
|
|
289
|
-
"×" - Hex: 0xd7, UTF-8 bytes: 0xc3 0x97
|
|
290
|
-
Replacement: AsciiMath: xx
|
|
291
|
-
----
|
|
292
|
-
|
|
293
178
|
|
|
294
179
|
=== Generate schemas command
|
|
295
180
|
|
data/lib/suma/cli/validate.rb
CHANGED
|
@@ -15,20 +15,6 @@ module Suma
|
|
|
15
15
|
links = Cli::ValidateLinks.new
|
|
16
16
|
links.extract_and_validate(*args)
|
|
17
17
|
end
|
|
18
|
-
|
|
19
|
-
desc "ascii EXPRESS_FILE_PATH",
|
|
20
|
-
"Validate EXPRESS files for ASCII-only content"
|
|
21
|
-
option :recursive, type: :boolean, default: false, aliases: "-r",
|
|
22
|
-
desc: "Validate EXPRESS files under the specified path recursively"
|
|
23
|
-
option :yaml, type: :boolean, default: false, aliases: "-y",
|
|
24
|
-
desc: "Output results in YAML format"
|
|
25
|
-
def ascii(express_file_path)
|
|
26
|
-
require_relative "validate_ascii"
|
|
27
|
-
|
|
28
|
-
validator = Cli::ValidateAscii.new
|
|
29
|
-
validator.options = options
|
|
30
|
-
validator.validate_ascii(express_file_path)
|
|
31
|
-
end
|
|
32
18
|
end
|
|
33
19
|
end
|
|
34
20
|
end
|
data/lib/suma/cli.rb
CHANGED
data/lib/suma/version.rb
CHANGED
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: suma
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.2.
|
|
4
|
+
version: 0.2.2
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Ribose Inc.
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2025-11
|
|
11
|
+
date: 2025-12-11 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: expressir
|
|
@@ -175,7 +175,6 @@ files:
|
|
|
175
175
|
- lib/suma/cli/generate_schemas.rb
|
|
176
176
|
- lib/suma/cli/reformat.rb
|
|
177
177
|
- lib/suma/cli/validate.rb
|
|
178
|
-
- lib/suma/cli/validate_ascii.rb
|
|
179
178
|
- lib/suma/cli/validate_links.rb
|
|
180
179
|
- lib/suma/collection_config.rb
|
|
181
180
|
- lib/suma/collection_manifest.rb
|
|
@@ -1,551 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
require "thor"
|
|
4
|
-
require "yaml"
|
|
5
|
-
require "paint"
|
|
6
|
-
require "plurimath"
|
|
7
|
-
require "set" # For using Set in unique_character_count
|
|
8
|
-
require "table_tennis"
|
|
9
|
-
require_relative "../thor_ext"
|
|
10
|
-
|
|
11
|
-
module Suma
|
|
12
|
-
module Cli
|
|
13
|
-
# Represents a non-ASCII character with its details and replacement
|
|
14
|
-
class NonAsciiCharacter
|
|
15
|
-
attr_reader :char, :hex, :utf8, :is_math, :replacement,
|
|
16
|
-
:replacement_type, :occurrences
|
|
17
|
-
|
|
18
|
-
def initialize(char, hex, utf8, is_math, replacement, replacement_type)
|
|
19
|
-
@char = char
|
|
20
|
-
@hex = hex
|
|
21
|
-
@utf8 = utf8
|
|
22
|
-
@is_math = is_math
|
|
23
|
-
@replacement = replacement
|
|
24
|
-
@replacement_type = replacement_type
|
|
25
|
-
@occurrences = []
|
|
26
|
-
end
|
|
27
|
-
|
|
28
|
-
def add_occurrence(line_number, column, line)
|
|
29
|
-
@occurrences << {
|
|
30
|
-
line_number: line_number,
|
|
31
|
-
column: column,
|
|
32
|
-
line: line,
|
|
33
|
-
}
|
|
34
|
-
end
|
|
35
|
-
|
|
36
|
-
def replacement_text
|
|
37
|
-
@is_math ? "AsciiMath: #{@replacement}" : "ISO 10303-11: #{@replacement}"
|
|
38
|
-
end
|
|
39
|
-
|
|
40
|
-
def occurrence_count
|
|
41
|
-
@occurrences.size
|
|
42
|
-
end
|
|
43
|
-
|
|
44
|
-
def to_h
|
|
45
|
-
{
|
|
46
|
-
character: @char,
|
|
47
|
-
hex: @hex,
|
|
48
|
-
utf8: @utf8,
|
|
49
|
-
is_math: @is_math,
|
|
50
|
-
replacement_type: @replacement_type,
|
|
51
|
-
replacement: @replacement,
|
|
52
|
-
occurrence_count: occurrence_count,
|
|
53
|
-
occurrences: @occurrences,
|
|
54
|
-
}
|
|
55
|
-
end
|
|
56
|
-
end
|
|
57
|
-
|
|
58
|
-
# Represents all non-ASCII characters in a file
|
|
59
|
-
class FileViolations
|
|
60
|
-
attr_reader :path, :filename, :directory, :violations, :unique_characters
|
|
61
|
-
|
|
62
|
-
def initialize(file_path)
|
|
63
|
-
@path = file_path
|
|
64
|
-
@filename = File.basename(file_path)
|
|
65
|
-
@directory = File.dirname(file_path)
|
|
66
|
-
@characters = {} # Map of characters to NonAsciiCharacter objects
|
|
67
|
-
@violations = [] # List of violations (line, column, etc.)
|
|
68
|
-
end
|
|
69
|
-
|
|
70
|
-
def add_violation(line_number, column, match, char_details, line)
|
|
71
|
-
violation = {
|
|
72
|
-
line_number: line_number,
|
|
73
|
-
column: column,
|
|
74
|
-
match: match,
|
|
75
|
-
char_details: char_details,
|
|
76
|
-
line: line,
|
|
77
|
-
}
|
|
78
|
-
|
|
79
|
-
@violations << violation
|
|
80
|
-
|
|
81
|
-
# Register each character
|
|
82
|
-
char_details.each do |detail|
|
|
83
|
-
char = detail[:char]
|
|
84
|
-
unless @characters[char]
|
|
85
|
-
@characters[char] = NonAsciiCharacter.new(
|
|
86
|
-
char,
|
|
87
|
-
detail[:hex],
|
|
88
|
-
detail[:utf8],
|
|
89
|
-
detail[:is_math],
|
|
90
|
-
detail[:replacement],
|
|
91
|
-
detail[:replacement_type],
|
|
92
|
-
)
|
|
93
|
-
end
|
|
94
|
-
|
|
95
|
-
@characters[char].add_occurrence(line_number, column, line)
|
|
96
|
-
end
|
|
97
|
-
end
|
|
98
|
-
|
|
99
|
-
def violation_count
|
|
100
|
-
@violations.size
|
|
101
|
-
end
|
|
102
|
-
|
|
103
|
-
def unique_characters
|
|
104
|
-
@characters.values
|
|
105
|
-
end
|
|
106
|
-
|
|
107
|
-
def display_path
|
|
108
|
-
"#{File.basename(@directory)}/#{@filename}"
|
|
109
|
-
end
|
|
110
|
-
|
|
111
|
-
def full_path
|
|
112
|
-
File.expand_path(@path)
|
|
113
|
-
end
|
|
114
|
-
|
|
115
|
-
def to_h
|
|
116
|
-
{
|
|
117
|
-
file: display_path,
|
|
118
|
-
count: violation_count,
|
|
119
|
-
non_ascii_characters: unique_characters.map(&:to_h),
|
|
120
|
-
}
|
|
121
|
-
end
|
|
122
|
-
end
|
|
123
|
-
|
|
124
|
-
# Collection of all violations across multiple files
|
|
125
|
-
class NonAsciiViolationCollection
|
|
126
|
-
attr_reader :file_violations, :total_files
|
|
127
|
-
|
|
128
|
-
def initialize
|
|
129
|
-
@file_violations = {} # Map of file paths to FileViolations objects
|
|
130
|
-
@total_files = 0
|
|
131
|
-
@unicode_to_asciimath = nil
|
|
132
|
-
end
|
|
133
|
-
|
|
134
|
-
def process_file(file)
|
|
135
|
-
@total_files += 1
|
|
136
|
-
|
|
137
|
-
# Initialize the mapping once
|
|
138
|
-
@unicode_to_asciimath ||= build_unicode_to_asciimath_map
|
|
139
|
-
|
|
140
|
-
file_violations = process_file_violations(file)
|
|
141
|
-
return if file_violations.violations.empty?
|
|
142
|
-
|
|
143
|
-
@file_violations[file] = file_violations
|
|
144
|
-
end
|
|
145
|
-
|
|
146
|
-
def files_with_violations
|
|
147
|
-
@file_violations.size
|
|
148
|
-
end
|
|
149
|
-
|
|
150
|
-
def total_violations
|
|
151
|
-
@file_violations.values.sum(&:violation_count)
|
|
152
|
-
end
|
|
153
|
-
|
|
154
|
-
def unique_character_count
|
|
155
|
-
# Get total unique characters across all files
|
|
156
|
-
all_chars = Set.new
|
|
157
|
-
@file_violations.each_value do |file_violation|
|
|
158
|
-
file_violation.unique_characters.each do |char|
|
|
159
|
-
all_chars.add(char.char)
|
|
160
|
-
end
|
|
161
|
-
end
|
|
162
|
-
all_chars.size
|
|
163
|
-
end
|
|
164
|
-
|
|
165
|
-
def total_occurrence_count
|
|
166
|
-
# Sum all occurrences of all characters across all files
|
|
167
|
-
@file_violations.values.sum do |file_violation|
|
|
168
|
-
file_violation.unique_characters.sum(&:occurrence_count)
|
|
169
|
-
end
|
|
170
|
-
end
|
|
171
|
-
|
|
172
|
-
def to_yaml_data
|
|
173
|
-
{
|
|
174
|
-
summary: {
|
|
175
|
-
total_files: @total_files,
|
|
176
|
-
files_with_violations: files_with_violations,
|
|
177
|
-
total_violations: total_violations,
|
|
178
|
-
total_unique_characters: unique_character_count,
|
|
179
|
-
total_occurrences: total_occurrence_count,
|
|
180
|
-
},
|
|
181
|
-
violations: @file_violations.transform_keys do |k|
|
|
182
|
-
File.expand_path(k)
|
|
183
|
-
end.transform_values(&:to_h),
|
|
184
|
-
}
|
|
185
|
-
end
|
|
186
|
-
|
|
187
|
-
def print_text_output
|
|
188
|
-
return if @file_violations.empty?
|
|
189
|
-
|
|
190
|
-
# Print each file's violations
|
|
191
|
-
@file_violations.each_value do |file_violation|
|
|
192
|
-
puts "\n#{Paint[file_violation.display_path, :cyan, :bold]}:"
|
|
193
|
-
|
|
194
|
-
file_violation.violations.each do |v|
|
|
195
|
-
puts " #{Paint['Line',
|
|
196
|
-
:blue]} #{Paint[v[:line_number],
|
|
197
|
-
:yellow]}, #{Paint['Column',
|
|
198
|
-
:blue]} #{Paint[v[:column],
|
|
199
|
-
:yellow]}:"
|
|
200
|
-
puts " #{v[:line]}"
|
|
201
|
-
puts " #{' ' * v[:column]}#{Paint['^' * v[:match].length,
|
|
202
|
-
:red]} #{Paint['Non-ASCII sequence',
|
|
203
|
-
:red]}"
|
|
204
|
-
|
|
205
|
-
v[:char_details].each do |cd|
|
|
206
|
-
character = file_violation.unique_characters.find do |c|
|
|
207
|
-
c.char == cd[:char]
|
|
208
|
-
end
|
|
209
|
-
next unless character
|
|
210
|
-
|
|
211
|
-
puts " #{Paint["\"#{cd[:char]}\"",
|
|
212
|
-
:yellow]} - Hex: #{Paint[cd[:hex],
|
|
213
|
-
:magenta]}, UTF-8 bytes: #{Paint[cd[:utf8],
|
|
214
|
-
:magenta]}"
|
|
215
|
-
puts " #{Paint['Replacement:',
|
|
216
|
-
:green]} #{character.replacement_text}"
|
|
217
|
-
end
|
|
218
|
-
puts ""
|
|
219
|
-
end
|
|
220
|
-
|
|
221
|
-
puts " #{Paint['Found',
|
|
222
|
-
:green]} #{Paint[file_violation.violation_count,
|
|
223
|
-
:red]} #{Paint['non-ASCII sequence(s) in',
|
|
224
|
-
:green]} #{Paint[file_violation.filename,
|
|
225
|
-
:cyan]}\n"
|
|
226
|
-
end
|
|
227
|
-
|
|
228
|
-
# Print summary
|
|
229
|
-
puts "\n#{Paint['Summary:', :blue, :bold]}"
|
|
230
|
-
puts " #{Paint['Scanned',
|
|
231
|
-
:green]} #{Paint[@total_files,
|
|
232
|
-
:yellow]} #{Paint['EXPRESS file(s)',
|
|
233
|
-
:green]}"
|
|
234
|
-
puts " #{Paint['Found',
|
|
235
|
-
:green]} #{Paint[total_violations,
|
|
236
|
-
:red]} #{Paint['non-ASCII sequence(s) in',
|
|
237
|
-
:green]} #{Paint[files_with_violations,
|
|
238
|
-
:red]} #{Paint['file(s)',
|
|
239
|
-
:green]}"
|
|
240
|
-
end
|
|
241
|
-
|
|
242
|
-
def print_table_output
|
|
243
|
-
return if @file_violations.empty?
|
|
244
|
-
|
|
245
|
-
# Build rows array
|
|
246
|
-
rows = []
|
|
247
|
-
total_occurrences = 0
|
|
248
|
-
|
|
249
|
-
@file_violations.each_value do |file_violation|
|
|
250
|
-
file_violation.unique_characters.each do |character|
|
|
251
|
-
occurrence_count = character.occurrence_count
|
|
252
|
-
total_occurrences += occurrence_count
|
|
253
|
-
|
|
254
|
-
rows << {
|
|
255
|
-
file: file_violation.display_path,
|
|
256
|
-
symbol: "\"#{character.char}\" (#{character.hex})",
|
|
257
|
-
replacement: character.replacement_text,
|
|
258
|
-
occurrences: occurrence_count,
|
|
259
|
-
}
|
|
260
|
-
end
|
|
261
|
-
end
|
|
262
|
-
|
|
263
|
-
# Add total row
|
|
264
|
-
rows << {
|
|
265
|
-
file: "TOTAL",
|
|
266
|
-
symbol: "#{unique_character_count} unique",
|
|
267
|
-
replacement: "",
|
|
268
|
-
occurrences: total_occurrences,
|
|
269
|
-
}
|
|
270
|
-
|
|
271
|
-
# Use TableTennis to render
|
|
272
|
-
options = {
|
|
273
|
-
title: "Non-ASCII Characters Summary",
|
|
274
|
-
columns: %i[file symbol replacement occurrences],
|
|
275
|
-
headers: {
|
|
276
|
-
file: "File",
|
|
277
|
-
symbol: "Symbol",
|
|
278
|
-
replacement: "Replacement",
|
|
279
|
-
occurrences: "Occurrences",
|
|
280
|
-
},
|
|
281
|
-
mark: ->(row) { row[:file] == "TOTAL" },
|
|
282
|
-
}
|
|
283
|
-
|
|
284
|
-
puts "\n#{TableTennis.new(rows, options)}\n"
|
|
285
|
-
end
|
|
286
|
-
|
|
287
|
-
private
|
|
288
|
-
|
|
289
|
-
def process_file_violations(file)
|
|
290
|
-
file_violations = FileViolations.new(file)
|
|
291
|
-
|
|
292
|
-
# Process file line by line
|
|
293
|
-
File.readlines(file,
|
|
294
|
-
encoding: "UTF-8").each_with_index do |line, line_idx|
|
|
295
|
-
line_number = line_idx + 1
|
|
296
|
-
|
|
297
|
-
# Skip if line only contains ASCII
|
|
298
|
-
next unless /[^\x00-\x7F]/.match?(line)
|
|
299
|
-
|
|
300
|
-
# Find all non-ASCII sequences
|
|
301
|
-
line.chomp.scan(/([^\x00-\x7F]+)/) do |match|
|
|
302
|
-
match = match[0]
|
|
303
|
-
column = line.index(match)
|
|
304
|
-
|
|
305
|
-
# Process each character in the sequence
|
|
306
|
-
char_details = match.chars.filter_map do |c|
|
|
307
|
-
process_non_ascii_char(c)
|
|
308
|
-
end
|
|
309
|
-
|
|
310
|
-
# Skip if no non-ASCII characters found
|
|
311
|
-
next if char_details.empty?
|
|
312
|
-
|
|
313
|
-
file_violations.add_violation(line_number, column, match,
|
|
314
|
-
char_details, line.chomp)
|
|
315
|
-
end
|
|
316
|
-
end
|
|
317
|
-
|
|
318
|
-
file_violations
|
|
319
|
-
end
|
|
320
|
-
|
|
321
|
-
def process_non_ascii_char(char)
|
|
322
|
-
# Skip ASCII characters
|
|
323
|
-
return nil if char.ord <= 0x7F
|
|
324
|
-
|
|
325
|
-
code_point = char.ord
|
|
326
|
-
hex = "0x#{code_point.to_s(16)}"
|
|
327
|
-
utf8 = code_point.chr(Encoding::UTF_8).bytes.map do |b|
|
|
328
|
-
"0x#{b.to_s(16)}"
|
|
329
|
-
end.join(" ")
|
|
330
|
-
|
|
331
|
-
# Check if it's a math symbol
|
|
332
|
-
if asciimath = @unicode_to_asciimath[char]
|
|
333
|
-
return {
|
|
334
|
-
char: char,
|
|
335
|
-
hex: hex,
|
|
336
|
-
utf8: utf8,
|
|
337
|
-
is_math: true,
|
|
338
|
-
replacement: asciimath,
|
|
339
|
-
replacement_type: "asciimath",
|
|
340
|
-
}
|
|
341
|
-
end
|
|
342
|
-
|
|
343
|
-
# Not a math symbol, use ISO encoding
|
|
344
|
-
{
|
|
345
|
-
char: char,
|
|
346
|
-
hex: hex,
|
|
347
|
-
utf8: utf8,
|
|
348
|
-
is_math: false,
|
|
349
|
-
replacement: encode_iso_10303_11(char),
|
|
350
|
-
replacement_type: "iso-10303-11",
|
|
351
|
-
}
|
|
352
|
-
end
|
|
353
|
-
|
|
354
|
-
def encode_iso_10303_11(char)
|
|
355
|
-
code_point = char.ord
|
|
356
|
-
|
|
357
|
-
# Format the encoded value with double quotes
|
|
358
|
-
if code_point < 0x10000
|
|
359
|
-
"\"#{sprintf('%08X', code_point)}\"" # e.g., "00000041" for 'A'
|
|
360
|
-
else
|
|
361
|
-
# For higher code points, use all four octets
|
|
362
|
-
group = (code_point >> 24) & 0xFF
|
|
363
|
-
plane = (code_point >> 16) & 0xFF
|
|
364
|
-
row = (code_point >> 8) & 0xFF
|
|
365
|
-
cell = code_point & 0xFF
|
|
366
|
-
|
|
367
|
-
"\"#{sprintf('%02X%02X%02X%02X', group, plane, row, cell)}\""
|
|
368
|
-
end
|
|
369
|
-
end
|
|
370
|
-
|
|
371
|
-
def build_unicode_to_asciimath_map
|
|
372
|
-
# Start with a pre-defined mapping of common math symbols
|
|
373
|
-
unicode_to_asciimath = {
|
|
374
|
-
# Greek letters
|
|
375
|
-
"α" => "alpha",
|
|
376
|
-
"β" => "beta",
|
|
377
|
-
"γ" => "gamma",
|
|
378
|
-
"Γ" => "Gamma",
|
|
379
|
-
"δ" => "delta",
|
|
380
|
-
"Δ" => "Delta",
|
|
381
|
-
"ε" => "epsilon",
|
|
382
|
-
"ζ" => "zeta",
|
|
383
|
-
"η" => "eta",
|
|
384
|
-
"θ" => "theta",
|
|
385
|
-
"Θ" => "Theta",
|
|
386
|
-
"ι" => "iota",
|
|
387
|
-
"κ" => "kappa",
|
|
388
|
-
"λ" => "lambda",
|
|
389
|
-
"Λ" => "Lambda",
|
|
390
|
-
"μ" => "mu",
|
|
391
|
-
"ν" => "nu",
|
|
392
|
-
"ξ" => "xi",
|
|
393
|
-
"Ξ" => "Xi",
|
|
394
|
-
"π" => "pi",
|
|
395
|
-
"Π" => "Pi",
|
|
396
|
-
"ρ" => "rho",
|
|
397
|
-
"σ" => "sigma",
|
|
398
|
-
"Σ" => "Sigma",
|
|
399
|
-
"τ" => "tau",
|
|
400
|
-
"υ" => "upsilon",
|
|
401
|
-
"φ" => "phi",
|
|
402
|
-
"Φ" => "Phi",
|
|
403
|
-
"χ" => "chi",
|
|
404
|
-
"ψ" => "psi",
|
|
405
|
-
"Ψ" => "Psi",
|
|
406
|
-
"ω" => "omega",
|
|
407
|
-
"Ω" => "Omega",
|
|
408
|
-
|
|
409
|
-
# Math operators
|
|
410
|
-
"×" => "xx",
|
|
411
|
-
"÷" => "div",
|
|
412
|
-
"±" => "pm",
|
|
413
|
-
"∓" => "mp",
|
|
414
|
-
"∞" => "oo",
|
|
415
|
-
"≤" => "le",
|
|
416
|
-
"≥" => "ge",
|
|
417
|
-
"≠" => "ne",
|
|
418
|
-
"≈" => "~~",
|
|
419
|
-
"≅" => "cong",
|
|
420
|
-
"≡" => "equiv",
|
|
421
|
-
"∈" => "in",
|
|
422
|
-
"∉" => "notin",
|
|
423
|
-
"⊂" => "subset",
|
|
424
|
-
"⊃" => "supset",
|
|
425
|
-
"∩" => "cap",
|
|
426
|
-
"∪" => "cup",
|
|
427
|
-
"∧" => "and",
|
|
428
|
-
"∨" => "or",
|
|
429
|
-
"¬" => "neg",
|
|
430
|
-
"∀" => "forall",
|
|
431
|
-
"∃" => "exists",
|
|
432
|
-
"∄" => "nexists",
|
|
433
|
-
"∇" => "grad",
|
|
434
|
-
"∂" => "del",
|
|
435
|
-
"∑" => "sum",
|
|
436
|
-
"∏" => "prod",
|
|
437
|
-
"∫" => "int",
|
|
438
|
-
"∮" => "oint",
|
|
439
|
-
"√" => "sqrt",
|
|
440
|
-
"⊥" => "perp",
|
|
441
|
-
"‖" => "norm",
|
|
442
|
-
"→" => "rarr",
|
|
443
|
-
"←" => "larr",
|
|
444
|
-
"↔" => "harr",
|
|
445
|
-
"⇒" => "rArr",
|
|
446
|
-
"⇐" => "lArr",
|
|
447
|
-
"⇔" => "hArr",
|
|
448
|
-
}
|
|
449
|
-
|
|
450
|
-
# Augment with symbols from Plurimath
|
|
451
|
-
begin
|
|
452
|
-
# Get all symbols supported by AsciiMath
|
|
453
|
-
Plurimath::Utility.symbols_files.each do |symbol_class|
|
|
454
|
-
symbol = symbol_class.new
|
|
455
|
-
|
|
456
|
-
# Get the Unicode and AsciiMath representations
|
|
457
|
-
unicodes = symbol.to_unicodemath
|
|
458
|
-
asciimaths = symbol.to_asciimath
|
|
459
|
-
|
|
460
|
-
# Skip if either representation is missing
|
|
461
|
-
next unless unicodes.is_a?(Array) && asciimaths.is_a?(Array)
|
|
462
|
-
# Skip if empty arrays
|
|
463
|
-
next if unicodes.empty? || asciimaths.empty?
|
|
464
|
-
|
|
465
|
-
unicodes.each_with_index do |unicode, index|
|
|
466
|
-
# Skip if we're beyond available AsciiMath representations
|
|
467
|
-
next if index >= asciimaths.length
|
|
468
|
-
# Skip empty string values
|
|
469
|
-
next if unicode.to_s.empty?
|
|
470
|
-
|
|
471
|
-
# Map each character to its AsciiMath equivalent
|
|
472
|
-
unicode.to_s.chars.each do |char|
|
|
473
|
-
# Only add if not already in our mapping
|
|
474
|
-
unicode_to_asciimath[char] ||= asciimaths[index]
|
|
475
|
-
end
|
|
476
|
-
end
|
|
477
|
-
rescue StandardError => e
|
|
478
|
-
# Skip this symbol class if there's an error
|
|
479
|
-
puts "Warning: Error processing symbol class #{symbol_class}: #{e.message}" if $DEBUG
|
|
480
|
-
end
|
|
481
|
-
rescue StandardError => e
|
|
482
|
-
# Continue even if Plurimath integration fails
|
|
483
|
-
puts "Warning: Error loading Plurimath symbols: #{e.message}" if $DEBUG
|
|
484
|
-
end
|
|
485
|
-
|
|
486
|
-
unicode_to_asciimath
|
|
487
|
-
end
|
|
488
|
-
end
|
|
489
|
-
|
|
490
|
-
# ValidateAscii command for checking EXPRESS files for non-ASCII characters
|
|
491
|
-
class ValidateAscii < Thor
|
|
492
|
-
desc "validate-ascii EXPRESS_FILE_PATH",
|
|
493
|
-
"Validate EXPRESS files for ASCII-only content"
|
|
494
|
-
option :recursive, type: :boolean, default: false, aliases: "-r",
|
|
495
|
-
desc: "Validate EXPRESS files under the specified " \
|
|
496
|
-
"path recursively"
|
|
497
|
-
option :yaml, type: :boolean, default: false, aliases: "-y",
|
|
498
|
-
desc: "Output results in YAML format"
|
|
499
|
-
|
|
500
|
-
def validate_ascii(express_file_path) # rubocop:disable Metrics/AbcSize
|
|
501
|
-
if File.file?(express_file_path)
|
|
502
|
-
unless File.exist?(express_file_path)
|
|
503
|
-
raise Errno::ENOENT, "Specified EXPRESS file " \
|
|
504
|
-
"`#{express_file_path}` not found."
|
|
505
|
-
end
|
|
506
|
-
|
|
507
|
-
if File.extname(express_file_path) != ".exp"
|
|
508
|
-
raise ArgumentError, "Specified file `#{express_file_path}` is " \
|
|
509
|
-
"not an EXPRESS file."
|
|
510
|
-
end
|
|
511
|
-
|
|
512
|
-
exp_files = [express_file_path]
|
|
513
|
-
elsif options[:recursive]
|
|
514
|
-
# Support the relative path with glob pattern
|
|
515
|
-
base_path = File.expand_path(express_file_path)
|
|
516
|
-
exp_files = Dir.glob("#{base_path}/**/*.exp")
|
|
517
|
-
else
|
|
518
|
-
# Non-recursive option
|
|
519
|
-
base_path = File.expand_path(express_file_path)
|
|
520
|
-
exp_files = Dir.glob("#{base_path}/*.exp")
|
|
521
|
-
end
|
|
522
|
-
|
|
523
|
-
if exp_files.empty?
|
|
524
|
-
raise Errno::ENOENT, "No EXPRESS files found in " \
|
|
525
|
-
"`#{express_file_path}`."
|
|
526
|
-
end
|
|
527
|
-
|
|
528
|
-
run(exp_files)
|
|
529
|
-
end
|
|
530
|
-
|
|
531
|
-
private
|
|
532
|
-
|
|
533
|
-
def run(exp_files)
|
|
534
|
-
# Process all files and collect violations
|
|
535
|
-
collection = NonAsciiViolationCollection.new
|
|
536
|
-
|
|
537
|
-
exp_files.each do |exp_file|
|
|
538
|
-
collection.process_file(exp_file)
|
|
539
|
-
end
|
|
540
|
-
|
|
541
|
-
# Output results based on format
|
|
542
|
-
if options[:yaml]
|
|
543
|
-
puts collection.to_yaml_data.to_yaml
|
|
544
|
-
else
|
|
545
|
-
collection.print_text_output
|
|
546
|
-
collection.print_table_output if collection.files_with_violations.positive?
|
|
547
|
-
end
|
|
548
|
-
end
|
|
549
|
-
end
|
|
550
|
-
end
|
|
551
|
-
end
|