suma 0.1.11 → 0.1.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 91d8fb8b814f47412c1374e7c12c9d3fb5368a5275c409355c6cc02a728305b6
4
- data.tar.gz: 3cdf5da9b7c251df88f6434222e3a575e6f1cf3839f83ab7cb47091180807510
3
+ metadata.gz: 2f4f11e324818926bf5846c777a2ae2e1e317f5d174f9fd6f41b0d8d7f955816
4
+ data.tar.gz: 78fafa5a5247461c7e9907594aaa4cd9b6ce3644966423ec9ded7917f7be2f1c
5
5
  SHA512:
6
- metadata.gz: 55422c9b98ac260feb77bc8937aff2d45196ba369850a5166b82fec88e7318007c2a6e3d345a441b0ea7e6e3c6df3854432a56ab33012079ababa0fae8a0f6ab
7
- data.tar.gz: 50e1425e0bb51cff0866dbb4f7de66c03fc72772660aa253eafa5f25ebcd072e6d7f4c4b5006ad84c78fbd9364eb8cf5494498fc1c6747efe9e2b50a9de8757a
6
+ metadata.gz: 718b82f083343c86f5dd9e3b7fcf82fab4b918a6911bd29c8b080b24cb6ca7e40e4f13f545ce16728789dd4e7f06d75dca6e1fb702ecde2ed2f3840951b1c356
7
+ data.tar.gz: 74b9d53b69b1b3008f4eeae03e1811fed2d35fbdbe4108c9f9e368fde96c98d84c8ce3a98b682bdf8669cdd136e4d8c43ad9855537f709f829acfe0f1ed6bcee
data/.rubocop_todo.yml CHANGED
@@ -1,6 +1,6 @@
1
1
  # This configuration was generated by
2
2
  # `rubocop --auto-gen-config`
3
- # on 2025-03-23 10:59:23 UTC using RuboCop version 1.74.0.
3
+ # on 2025-04-03 06:48:36 UTC using RuboCop version 1.75.1.
4
4
  # The point is for the user to remove these configuration records
5
5
  # one by one as the offenses are removed from the code base.
6
6
  # Note that changes in the inspected code, or installation of new
@@ -13,7 +13,17 @@ Gemspec/DuplicatedAssignment:
13
13
  Exclude:
14
14
  - 'suma.gemspec'
15
15
 
16
- # Offense count: 43
16
+ # Offense count: 2
17
+ # This cop supports safe autocorrection (--autocorrect).
18
+ # Configuration parameters: AllowMultipleStyles, EnforcedHashRocketStyle, EnforcedColonStyle, EnforcedLastArgumentHashStyle.
19
+ # SupportedHashRocketStyles: key, separator, table
20
+ # SupportedColonStyles: key, separator, table
21
+ # SupportedLastArgumentHashStyles: always_inspect, always_ignore, ignore_implicit, ignore_explicit
22
+ Layout/HashAlignment:
23
+ Exclude:
24
+ - 'lib/suma/cli/validate.rb'
25
+
26
+ # Offense count: 55
17
27
  # This cop supports safe autocorrection (--autocorrect).
18
28
  # Configuration parameters: Max, AllowHeredoc, AllowURI, URISchemes, IgnoreCopDirectives, AllowedPatterns, SplitStrings.
19
29
  # URISchemes: http, https
@@ -21,33 +31,38 @@ Layout/LineLength:
21
31
  Exclude:
22
32
  - 'lib/suma/cli.rb'
23
33
  - 'lib/suma/cli/build.rb'
24
- - 'lib/suma/cli/links.rb'
34
+ - 'lib/suma/cli/validate.rb'
35
+ - 'lib/suma/cli/validate_ascii.rb'
36
+ - 'lib/suma/cli/validate_links.rb'
25
37
  - 'lib/suma/collection_manifest.rb'
26
38
  - 'lib/suma/processor.rb'
27
39
  - 'lib/suma/schema_attachment.rb'
28
40
  - 'lib/suma/schema_collection.rb'
29
41
  - 'lib/suma/schema_document.rb'
30
42
  - 'lib/suma/thor_ext.rb'
43
+ - 'spec/suma/cli/validate_ascii_spec.rb'
31
44
  - 'suma.gemspec'
32
45
 
33
- # Offense count: 1
34
- Lint/DuplicateMethods:
46
+ # Offense count: 3
47
+ # This cop supports safe autocorrection (--autocorrect).
48
+ # Configuration parameters: AllowInHeredoc.
49
+ Layout/TrailingWhitespace:
35
50
  Exclude:
36
- - 'lib/suma/express_schema.rb'
51
+ - 'lib/suma/cli.rb'
52
+ - 'lib/suma/cli/validate.rb'
37
53
 
38
54
  # Offense count: 2
39
- # This cop supports safe autocorrection (--autocorrect).
40
- # Configuration parameters: AutoCorrect, AllowUnusedKeywordArguments, IgnoreEmptyMethods, IgnoreNotImplementedMethods, NotImplementedExceptions.
41
- # NotImplementedExceptions: NotImplementedError
42
- Lint/UnusedMethodArgument:
55
+ Lint/DuplicateMethods:
43
56
  Exclude:
44
- - 'lib/suma/cli.rb'
57
+ - 'lib/suma/cli/validate_ascii.rb'
58
+ - 'lib/suma/express_schema.rb'
45
59
 
46
- # Offense count: 11
60
+ # Offense count: 15
47
61
  # Configuration parameters: AllowedMethods, AllowedPatterns, CountRepeatedAttributes, Max.
48
62
  Metrics/AbcSize:
49
63
  Exclude:
50
- - 'lib/suma/cli/links.rb'
64
+ - 'lib/suma/cli/validate_ascii.rb'
65
+ - 'lib/suma/cli/validate_links.rb'
51
66
  - 'lib/suma/schema_attachment.rb'
52
67
  - 'lib/suma/schema_document.rb'
53
68
  - 'lib/suma/thor_ext.rb'
@@ -58,28 +73,39 @@ Metrics/AbcSize:
58
73
  Metrics/BlockLength:
59
74
  Max: 64
60
75
 
61
- # Offense count: 2
76
+ # Offense count: 4
62
77
  # Configuration parameters: AllowedMethods, AllowedPatterns, Max.
63
78
  Metrics/CyclomaticComplexity:
64
79
  Exclude:
65
- - 'lib/suma/cli/links.rb'
80
+ - 'lib/suma/cli/validate_ascii.rb'
81
+ - 'lib/suma/cli/validate_links.rb'
66
82
  - 'lib/suma/thor_ext.rb'
67
83
 
68
- # Offense count: 9
84
+ # Offense count: 21
69
85
  # Configuration parameters: CountComments, CountAsOne, AllowedMethods, AllowedPatterns.
70
86
  Metrics/MethodLength:
71
- Max: 72
87
+ Max: 107
72
88
 
73
- # Offense count: 3
89
+ # Offense count: 4
74
90
  # Configuration parameters: CountKeywordArgs, MaxOptionalParameters.
75
91
  Metrics/ParameterLists:
76
92
  Max: 6
77
93
 
78
- # Offense count: 1
94
+ # Offense count: 2
79
95
  # Configuration parameters: AllowedMethods, AllowedPatterns, Max.
80
96
  Metrics/PerceivedComplexity:
81
97
  Exclude:
82
- - 'lib/suma/cli/links.rb'
98
+ - 'lib/suma/cli/validate_ascii.rb'
99
+ - 'lib/suma/cli/validate_links.rb'
100
+
101
+ # Offense count: 5
102
+ # Configuration parameters: EnforcedStyle, CheckMethodNames, CheckSymbols, AllowedIdentifiers, AllowedPatterns.
103
+ # SupportedStyles: snake_case, normalcase, non_integer
104
+ # AllowedIdentifiers: TLS1_1, TLS1_2, capture3, iso8601, rfc1123_date, rfc822, rfc2822, rfc3339, x86_64
105
+ Naming/VariableNumber:
106
+ Exclude:
107
+ - 'lib/suma/cli/validate_ascii.rb'
108
+ - 'spec/suma/cli/validate_ascii_spec.rb'
83
109
 
84
110
  # Offense count: 1
85
111
  # Configuration parameters: MinSize.
@@ -87,9 +113,14 @@ Performance/CollectionLiteralInLoop:
87
113
  Exclude:
88
114
  - 'spec/suma/cli_spec.rb'
89
115
 
90
- # Offense count: 1
116
+ # Offense count: 3
117
+ # Configuration parameters: CountAsOne.
118
+ RSpec/ExampleLength:
119
+ Max: 16
120
+
121
+ # Offense count: 5
91
122
  RSpec/MultipleExpectations:
92
- Max: 2
123
+ Max: 12
93
124
 
94
125
  # Offense count: 1
95
126
  # This cop supports safe autocorrection (--autocorrect).
@@ -97,4 +128,11 @@ RSpec/MultipleExpectations:
97
128
  # SupportedStyles: empty, nil, both
98
129
  Style/EmptyElse:
99
130
  Exclude:
100
- - 'lib/suma/cli/links.rb'
131
+ - 'lib/suma/cli/validate_links.rb'
132
+
133
+ # Offense count: 4
134
+ # This cop supports safe autocorrection (--autocorrect).
135
+ # Configuration parameters: MaxUnannotatedPlaceholdersAllowed, Mode, AllowedMethods, AllowedPatterns.
136
+ # SupportedStyles: annotated, template, unannotated
137
+ Style/FormatStringToken:
138
+ EnforcedStyle: unannotated
data/README.adoc CHANGED
@@ -37,9 +37,10 @@ $ gem install suma
37
37
  # Defaults to `suma help`
38
38
  $ suma
39
39
  Commands:
40
- suma build METANORMA_SITE_MANIFEST # Build collection specified in site manifest (`metanorma*.yml`)
41
- suma links SUBCOMMAND ...ARGS # Manage EXPRESS links
42
- suma help [COMMAND] # Describe available commands or one specific command
40
+ suma build METANORMA_SITE_MANIFEST # Build collection specified in site manifest (`metanorma*.yml`)
41
+ suma reformat EXPRESS_FILE_PATH # Reformat EXPRESS files
42
+ suma validate SUBCOMMAND ...ARGS # Validate express documents
43
+ suma help [COMMAND] # Describe available commands or one specific command
43
44
  ----
44
45
 
45
46
  === Build command
@@ -89,19 +90,61 @@ $ bundle exec suma build --no-compile metanorma-srl.yml
89
90
  All documents need to have a `schemas.yaml` in their document root that lists
90
91
  out which schemas the document includes.
91
92
 
92
- === Links command
93
+ === Reformat command
94
+
95
+ The `reformat` command provides utilities for reformatting EXPRESS files.
96
+
97
+ [source,sh]
98
+ ----
99
+ $ suma reformat EXPRESS_FILE_PATH [options]
100
+ ----
101
+
102
+ Parameters:
103
+
104
+ `EXPRESS_FILE_PATH`:: Path to an EXPRESS file or a folder containing EXPRESS
105
+ files
106
+
107
+ Options:
108
+
109
+ `--[no-]recursive`, `-r`:: Select EXPRESS files recursively based on the specified
110
+ folder path (default: false)
111
+
112
+ [example]
113
+ ====
114
+ .To reformat all EXPRESS files under the current directory recursively
115
+ [source,sh]
116
+ ----
117
+ $ bundle exec suma reformat `pwd` -r
118
+ ----
119
+ ====
120
+
121
+ This command:
122
+
123
+ * Loads the EXPRESS files specified in the `EXPRESS_FILE_PATH`
124
+ * Reformats and saves the loaded EXPRESS files
125
+
126
+ === Validate command
93
127
 
94
128
  ==== General
95
129
 
96
- The `links` command provides utilities for managing EXPRESS links.
130
+ The `validate` command groups various validation utilities for EXPRESS documents.
131
+
132
+ [source,sh]
133
+ ----
134
+ $ suma validate SUBCOMMAND [options]
135
+ ----
136
+
137
+ Subcommands:
138
+ - `links` - Validate EXPRESS links
139
+ - `ascii` - Check for non-ASCII characters in EXPRESS files
97
140
 
98
- ==== Extract and validate
141
+ ==== Links subcommand
99
142
 
100
- Extracts and validates EXPRESS links without creating intermediate files.
143
+ The `links` subcommand extracts and validates EXPRESS links without creating intermediate files.
101
144
 
102
145
  [source,sh]
103
146
  ----
104
- $ suma links extract_and_validate SCHEMAS_FILE DOCUMENTS_PATH [OUTPUT_FILE]
147
+ $ suma validate links SCHEMAS_FILE DOCUMENTS_PATH [OUTPUT_FILE]
105
148
  ----
106
149
 
107
150
  Parameters:
@@ -117,7 +160,7 @@ Parameters:
117
160
  .To validate EXPRESS links in documents
118
161
  [source,sh]
119
162
  ----
120
- $ bundle exec suma links extract_and_validate schemas-srl.yml documents validation_results.txt
163
+ $ bundle exec suma validate links schemas-srl.yml documents validation_results.txt
121
164
  ----
122
165
  ====
123
166
 
@@ -129,16 +172,13 @@ This command:
129
172
  * Writes validation results to the `OUTPUT_FILE`
130
173
  * Provides progress bars to track schema loading and link validation
131
174
 
175
+ ==== ASCII subcommand
132
176
 
133
- === Reformat command
134
-
135
- ==== General
136
-
137
- The `reformat` command provides utilities for reformatting EXPRESS files.
177
+ The `ascii` subcommand detects non-ASCII characters in EXPRESS files and reports on those exact lines, providing replacement suggestions.
138
178
 
139
179
  [source,sh]
140
180
  ----
141
- $ suma reformat EXPRESS_FILE_PATH [options]
181
+ $ suma validate ascii EXPRESS_FILE_PATH [options]
142
182
  ----
143
183
 
144
184
  Parameters:
@@ -148,22 +188,103 @@ files
148
188
 
149
189
  Options:
150
190
 
151
- `--[no-]recursive`:: Select EXPRESS files recursively based on the specified
191
+ `--[no-]recursive`, `-r`:: Select EXPRESS files recursively based on the specified
152
192
  folder path (default: false)
193
+ `--[no-]yaml`, `-y`:: Output results in YAML format for machine processing (default: false)
153
194
 
154
195
  [example]
155
196
  ====
156
- .To reformat all EXPRESS files under the current directory recursively
197
+ .To validate all EXPRESS files in a specific directory recursively
157
198
  [source,sh]
158
199
  ----
159
- $ bundle exec suma reformat `pwd` -r
200
+ $ bundle exec suma validate ascii ../iso-10303/schemas -r
201
+ ----
202
+
203
+ .To validate and output results in YAML format
204
+ [source,sh]
205
+ ----
206
+ $ bundle exec suma validate ascii ../iso-10303/schemas -r -y > validation.yml
160
207
  ----
161
208
  ====
162
209
 
163
210
  This command:
164
211
 
165
212
  * Loads the EXPRESS files specified in the `EXPRESS_FILE_PATH`
166
- * Reformats and saves the loaded EXPRESS files
213
+ * Scans each line for non-ASCII characters
214
+ * Reports detailed information about each violation, including:
215
+ ** Filename and line number
216
+ ** The exact line content
217
+ ** Visual indication of the non-ASCII sequence location
218
+ ** Character details with hexadecimal representation
219
+ * Provides specific replacement suggestions:
220
+ ** For math symbols: provides equivalent AsciiMath notation
221
+ ** For other non-ASCII characters: provides ISO 10303-11 encoded string literal format
222
+ * Displays a summary table showing:
223
+ ** File path (directory/filename)
224
+ ** Each non-ASCII symbol found
225
+ ** Suggested replacement for each symbol
226
+ ** Number of occurrences of each character
227
+ ** Totals row showing unique character count and overall occurrences
228
+ * Summarizes findings across all scanned files
229
+ * Optionally outputs structured data in YAML format with detailed occurrence information
230
+
231
+ Human-readable output format example:
232
+
233
+ [source,text]
234
+ ----
235
+ /path/to/file.exp:
236
+ Line 42, Column 15:
237
+ ENTITY some_entity (name: STRING, description: "résumé");
238
+ ^^^^^
239
+ "é" - Hex: 0xe9, UTF-8 bytes: 0xc3 0xa9
240
+ Replacement: ISO 10303-11: "000000E9"
241
+
242
+ "s" - Hex: 0x73, UTF-8 bytes: 0x73
243
+
244
+ "u" - Hex: 0x75, UTF-8 bytes: 0x75
245
+
246
+ "m" - Hex: 0x6d, UTF-8 bytes: 0x6d
247
+
248
+ "é" - Hex: 0xe9, UTF-8 bytes: 0xc3 0xa9
249
+ Replacement: ISO 10303-11: "000000E9"
250
+
251
+ Found 1 non-ASCII sequence(s) in file.exp
252
+
253
+ Summary:
254
+ Scanned 3 EXPRESS file(s)
255
+ Found 1 non-ASCII sequence(s) in 1 file(s)
256
+
257
+ +------------------+--------------------+-----------------------------+-------------+
258
+ | File | Symbol | Replacement | Occurrences |
259
+ +------------------+--------------------+-----------------------------+-------------+
260
+ | path/to/file.exp | "é" (0xe9) | ISO 10303-11: "000000E9" | 2 |
261
+ +------------------+--------------------+-----------------------------+-------------+
262
+ | TOTAL | 1 unique | | 2 |
263
+ +------------------+--------------------+-----------------------------+-------------+
264
+ ----
265
+
266
+ ===== Japanese Character Example
267
+
268
+ For Japanese characters like 神戸 (Kobe), the command will provide ISO 10303-11 encoded string literal replacements:
269
+
270
+ [source,text]
271
+ ----
272
+ "神" - Hex: 0x795e, UTF-8 bytes: 0xe7 0xa5 0x9e
273
+ Replacement: ISO 10303-11: "0000795E"
274
+
275
+ "戸" - Hex: 0x6238, UTF-8 bytes: 0xe6 0x88 0xb8
276
+ Replacement: ISO 10303-11: "00006238"
277
+ ----
278
+
279
+ ===== Math Symbol Example
280
+
281
+ For mathematical symbols, the command will provide equivalent AsciiMath notation:
282
+
283
+ [source,text]
284
+ ----
285
+ "×" - Hex: 0xd7, UTF-8 bytes: 0xc3 0x97
286
+ Replacement: AsciiMath: xx
287
+ ----
167
288
 
168
289
 
169
290
  == Usage: Ruby
@@ -0,0 +1,34 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "thor"
4
+
5
+ module Suma
6
+ module Cli
7
+ # Main validate command that groups the validation subcommands
8
+ class Validate < Thor
9
+ desc "links SCHEMAS_FILE DOCUMENTS_PATH [OUTPUT_FILE]",
10
+ "Extract and validate express links without creating intermediate file"
11
+ def links(*args)
12
+ require_relative "validate_links"
13
+
14
+ # Forward the command to ValidateLinks
15
+ links = Cli::ValidateLinks.new
16
+ links.extract_and_validate(*args)
17
+ end
18
+
19
+ desc "ascii EXPRESS_FILE_PATH",
20
+ "Validate EXPRESS files for ASCII-only content"
21
+ option :recursive, type: :boolean, default: false, aliases: "-r",
22
+ desc: "Validate EXPRESS files under the specified path recursively"
23
+ option :yaml, type: :boolean, default: false, aliases: "-y",
24
+ desc: "Output results in YAML format"
25
+ def ascii(express_file_path)
26
+ require_relative "validate_ascii"
27
+
28
+ validator = Cli::ValidateAscii.new
29
+ validator.options = options
30
+ validator.validate_ascii(express_file_path)
31
+ end
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,519 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "thor"
4
+ require "yaml"
5
+ require "terminal-table"
6
+ require "plurimath"
7
+ require "set" # For using Set in unique_character_count
8
+ require_relative "../thor_ext"
9
+
10
+ module Suma
11
+ module Cli
12
+ # Represents a non-ASCII character with its details and replacement
13
+ class NonAsciiCharacter
14
+ attr_reader :char, :hex, :utf8, :is_math, :replacement,
15
+ :replacement_type, :occurrences
16
+
17
+ def initialize(char, hex, utf8, is_math, replacement, replacement_type)
18
+ @char = char
19
+ @hex = hex
20
+ @utf8 = utf8
21
+ @is_math = is_math
22
+ @replacement = replacement
23
+ @replacement_type = replacement_type
24
+ @occurrences = []
25
+ end
26
+
27
+ def add_occurrence(line_number, column, line)
28
+ @occurrences << {
29
+ line_number: line_number,
30
+ column: column,
31
+ line: line,
32
+ }
33
+ end
34
+
35
+ def replacement_text
36
+ @is_math ? "AsciiMath: #{@replacement}" : "ISO 10303-11: #{@replacement}"
37
+ end
38
+
39
+ def occurrence_count
40
+ @occurrences.size
41
+ end
42
+
43
+ def to_h
44
+ {
45
+ character: @char,
46
+ hex: @hex,
47
+ utf8: @utf8,
48
+ is_math: @is_math,
49
+ replacement_type: @replacement_type,
50
+ replacement: @replacement,
51
+ occurrence_count: occurrence_count,
52
+ occurrences: @occurrences,
53
+ }
54
+ end
55
+ end
56
+
57
+ # Represents all non-ASCII characters in a file
58
+ class FileViolations
59
+ attr_reader :path, :filename, :directory, :violations, :unique_characters
60
+
61
+ def initialize(file_path)
62
+ @path = file_path
63
+ @filename = File.basename(file_path)
64
+ @directory = File.dirname(file_path)
65
+ @characters = {} # Map of characters to NonAsciiCharacter objects
66
+ @violations = [] # List of violations (line, column, etc.)
67
+ end
68
+
69
+ def add_violation(line_number, column, match, char_details, line)
70
+ violation = {
71
+ line_number: line_number,
72
+ column: column,
73
+ match: match,
74
+ char_details: char_details,
75
+ line: line,
76
+ }
77
+
78
+ @violations << violation
79
+
80
+ # Register each character
81
+ char_details.each do |detail|
82
+ char = detail[:char]
83
+ unless @characters[char]
84
+ @characters[char] = NonAsciiCharacter.new(
85
+ char,
86
+ detail[:hex],
87
+ detail[:utf8],
88
+ detail[:is_math],
89
+ detail[:replacement],
90
+ detail[:replacement_type],
91
+ )
92
+ end
93
+
94
+ @characters[char].add_occurrence(line_number, column, line)
95
+ end
96
+ end
97
+
98
+ def violation_count
99
+ @violations.size
100
+ end
101
+
102
+ def unique_characters
103
+ @characters.values
104
+ end
105
+
106
+ def display_path
107
+ "#{File.basename(@directory)}/#{@filename}"
108
+ end
109
+
110
+ def full_path
111
+ File.expand_path(@path)
112
+ end
113
+
114
+ def to_h
115
+ {
116
+ file: display_path,
117
+ count: violation_count,
118
+ non_ascii_characters: unique_characters.map(&:to_h),
119
+ }
120
+ end
121
+ end
122
+
123
+ # Collection of all violations across multiple files
124
+ class NonAsciiViolationCollection
125
+ attr_reader :file_violations, :total_files
126
+
127
+ def initialize
128
+ @file_violations = {} # Map of file paths to FileViolations objects
129
+ @total_files = 0
130
+ @unicode_to_asciimath = nil
131
+ end
132
+
133
+ def process_file(file)
134
+ @total_files += 1
135
+
136
+ # Initialize the mapping once
137
+ @unicode_to_asciimath ||= build_unicode_to_asciimath_map
138
+
139
+ file_violations = process_file_violations(file)
140
+ return if file_violations.violations.empty?
141
+
142
+ @file_violations[file] = file_violations
143
+ end
144
+
145
+ def files_with_violations
146
+ @file_violations.size
147
+ end
148
+
149
+ def total_violations
150
+ @file_violations.values.sum(&:violation_count)
151
+ end
152
+
153
+ def unique_character_count
154
+ # Get total unique characters across all files
155
+ all_chars = Set.new
156
+ @file_violations.each_value do |file_violation|
157
+ file_violation.unique_characters.each do |char|
158
+ all_chars.add(char.char)
159
+ end
160
+ end
161
+ all_chars.size
162
+ end
163
+
164
+ def total_occurrence_count
165
+ # Sum all occurrences of all characters across all files
166
+ @file_violations.values.sum do |file_violation|
167
+ file_violation.unique_characters.sum(&:occurrence_count)
168
+ end
169
+ end
170
+
171
+ def to_yaml_data
172
+ {
173
+ summary: {
174
+ total_files: @total_files,
175
+ files_with_violations: files_with_violations,
176
+ total_violations: total_violations,
177
+ total_unique_characters: unique_character_count,
178
+ total_occurrences: total_occurrence_count,
179
+ },
180
+ violations: @file_violations.transform_keys do |k|
181
+ File.expand_path(k)
182
+ end.transform_values(&:to_h),
183
+ }
184
+ end
185
+
186
+ def print_text_output
187
+ return if @file_violations.empty?
188
+
189
+ # Print each file's violations
190
+ @file_violations.each_value do |file_violation|
191
+ puts "\n#{file_violation.display_path}:"
192
+
193
+ file_violation.violations.each do |v|
194
+ puts " Line #{v[:line_number]}, Column #{v[:column]}:"
195
+ puts " #{v[:line]}"
196
+ puts " #{' ' * v[:column]}#{'^' * v[:match].length} Non-ASCII sequence"
197
+
198
+ v[:char_details].each do |cd|
199
+ character = file_violation.unique_characters.find do |c|
200
+ c.char == cd[:char]
201
+ end
202
+ next unless character
203
+
204
+ puts " \"#{cd[:char]}\" - Hex: #{cd[:hex]}, UTF-8 bytes: #{cd[:utf8]}"
205
+ puts " Replacement: #{character.replacement_text}"
206
+ end
207
+ puts ""
208
+ end
209
+
210
+ puts " Found #{file_violation.violation_count} non-ASCII sequence(s) in #{file_violation.filename}\n"
211
+ end
212
+
213
+ # Print summary
214
+ puts "\nSummary:"
215
+ puts " Scanned #{@total_files} EXPRESS file(s)"
216
+ puts " Found #{total_violations} non-ASCII sequence(s) in #{files_with_violations} file(s)"
217
+ end
218
+
219
+ def print_table_output
220
+ return if @file_violations.empty?
221
+
222
+ table = ::Terminal::Table.new(
223
+ title: "Non-ASCII Characters Summary",
224
+ headings: ["File", "Symbol", "Replacement", "Occurrences"],
225
+ )
226
+
227
+ total_occurrences = 0
228
+
229
+ @file_violations.each_value do |file_violation|
230
+ file_violation.unique_characters.each do |character|
231
+ occurrence_count = character.occurrence_count
232
+ total_occurrences += occurrence_count
233
+
234
+ table.add_row [
235
+ file_violation.display_path,
236
+ "\"#{character.char}\" (#{character.hex})",
237
+ character.replacement_text,
238
+ occurrence_count,
239
+ ]
240
+ end
241
+ end
242
+
243
+ # Add a separator and total row
244
+ table.add_separator
245
+ table.add_row [
246
+ "TOTAL",
247
+ "#{unique_character_count} unique",
248
+ "",
249
+ total_occurrences,
250
+ ]
251
+
252
+ puts "\n#{table}\n"
253
+ end
254
+
255
+ private
256
+
257
+ def process_file_violations(file)
258
+ file_violations = FileViolations.new(file)
259
+
260
+ # Process file line by line
261
+ File.readlines(file,
262
+ encoding: "UTF-8").each_with_index do |line, line_idx|
263
+ line_number = line_idx + 1
264
+
265
+ # Skip if line only contains ASCII
266
+ next unless /[^\x00-\x7F]/.match?(line)
267
+
268
+ # Find all non-ASCII sequences
269
+ line.chomp.scan(/([^\x00-\x7F]+)/) do |match|
270
+ match = match[0]
271
+ column = line.index(match)
272
+
273
+ # Process each character in the sequence
274
+ char_details = match.chars.filter_map do |c|
275
+ process_non_ascii_char(c)
276
+ end
277
+
278
+ # Skip if no non-ASCII characters found
279
+ next if char_details.empty?
280
+
281
+ file_violations.add_violation(line_number, column, match,
282
+ char_details, line.chomp)
283
+ end
284
+ end
285
+
286
+ file_violations
287
+ end
288
+
289
+ def process_non_ascii_char(char)
290
+ # Skip ASCII characters
291
+ return nil if char.ord <= 0x7F
292
+
293
+ code_point = char.ord
294
+ hex = "0x#{code_point.to_s(16)}"
295
+ utf8 = code_point.chr(Encoding::UTF_8).bytes.map do |b|
296
+ "0x#{b.to_s(16)}"
297
+ end.join(" ")
298
+
299
+ # Check if it's a math symbol
300
+ if asciimath = @unicode_to_asciimath[char]
301
+ return {
302
+ char: char,
303
+ hex: hex,
304
+ utf8: utf8,
305
+ is_math: true,
306
+ replacement: asciimath,
307
+ replacement_type: "asciimath",
308
+ }
309
+ end
310
+
311
+ # Not a math symbol, use ISO encoding
312
+ {
313
+ char: char,
314
+ hex: hex,
315
+ utf8: utf8,
316
+ is_math: false,
317
+ replacement: encode_iso_10303_11(char),
318
+ replacement_type: "iso-10303-11",
319
+ }
320
+ end
321
+
322
+ def encode_iso_10303_11(char)
323
+ code_point = char.ord
324
+
325
+ # Format the encoded value with double quotes
326
+ if code_point < 0x10000
327
+ "\"#{sprintf('%08X', code_point)}\"" # e.g., "00000041" for 'A'
328
+ else
329
+ # For higher code points, use all four octets
330
+ group = (code_point >> 24) & 0xFF
331
+ plane = (code_point >> 16) & 0xFF
332
+ row = (code_point >> 8) & 0xFF
333
+ cell = code_point & 0xFF
334
+
335
+ "\"#{sprintf('%02X%02X%02X%02X', group, plane, row, cell)}\""
336
+ end
337
+ end
338
+
339
+ def build_unicode_to_asciimath_map
340
+ # Start with a pre-defined mapping of common math symbols
341
+ unicode_to_asciimath = {
342
+ # Greek letters
343
+ "α" => "alpha",
344
+ "β" => "beta",
345
+ "γ" => "gamma",
346
+ "Γ" => "Gamma",
347
+ "δ" => "delta",
348
+ "Δ" => "Delta",
349
+ "ε" => "epsilon",
350
+ "ζ" => "zeta",
351
+ "η" => "eta",
352
+ "θ" => "theta",
353
+ "Θ" => "Theta",
354
+ "ι" => "iota",
355
+ "κ" => "kappa",
356
+ "λ" => "lambda",
357
+ "Λ" => "Lambda",
358
+ "μ" => "mu",
359
+ "ν" => "nu",
360
+ "ξ" => "xi",
361
+ "Ξ" => "Xi",
362
+ "π" => "pi",
363
+ "Π" => "Pi",
364
+ "ρ" => "rho",
365
+ "σ" => "sigma",
366
+ "Σ" => "Sigma",
367
+ "τ" => "tau",
368
+ "υ" => "upsilon",
369
+ "φ" => "phi",
370
+ "Φ" => "Phi",
371
+ "χ" => "chi",
372
+ "ψ" => "psi",
373
+ "Ψ" => "Psi",
374
+ "ω" => "omega",
375
+ "Ω" => "Omega",
376
+
377
+ # Math operators
378
+ "×" => "xx",
379
+ "÷" => "div",
380
+ "±" => "pm",
381
+ "∓" => "mp",
382
+ "∞" => "oo",
383
+ "≤" => "le",
384
+ "≥" => "ge",
385
+ "≠" => "ne",
386
+ "≈" => "~~",
387
+ "≅" => "cong",
388
+ "≡" => "equiv",
389
+ "∈" => "in",
390
+ "∉" => "notin",
391
+ "⊂" => "subset",
392
+ "⊃" => "supset",
393
+ "∩" => "cap",
394
+ "∪" => "cup",
395
+ "∧" => "and",
396
+ "∨" => "or",
397
+ "¬" => "neg",
398
+ "∀" => "forall",
399
+ "∃" => "exists",
400
+ "∄" => "nexists",
401
+ "∇" => "grad",
402
+ "∂" => "del",
403
+ "∑" => "sum",
404
+ "∏" => "prod",
405
+ "∫" => "int",
406
+ "∮" => "oint",
407
+ "√" => "sqrt",
408
+ "⊥" => "perp",
409
+ "‖" => "norm",
410
+ "→" => "rarr",
411
+ "←" => "larr",
412
+ "↔" => "harr",
413
+ "⇒" => "rArr",
414
+ "⇐" => "lArr",
415
+ "⇔" => "hArr",
416
+ }
417
+
418
+ # Augment with symbols from Plurimath
419
+ begin
420
+ # Get all symbols supported by AsciiMath
421
+ Plurimath::Utility.symbols_files.each do |symbol_class|
422
+ symbol = symbol_class.new
423
+
424
+ # Get the Unicode and AsciiMath representations
425
+ unicodes = symbol.to_unicodemath
426
+ asciimaths = symbol.to_asciimath
427
+
428
+ # Skip if either representation is missing
429
+ next unless unicodes.is_a?(Array) && asciimaths.is_a?(Array)
430
+ # Skip if empty arrays
431
+ next if unicodes.empty? || asciimaths.empty?
432
+
433
+ unicodes.each_with_index do |unicode, index|
434
+ # Skip if we're beyond available AsciiMath representations
435
+ next if index >= asciimaths.length
436
+ # Skip empty string values
437
+ next if unicode.to_s.empty?
438
+
439
+ # Map each character to its AsciiMath equivalent
440
+ unicode.to_s.chars.each do |char|
441
+ # Only add if not already in our mapping
442
+ unicode_to_asciimath[char] ||= asciimaths[index]
443
+ end
444
+ end
445
+ rescue StandardError => e
446
+ # Skip this symbol class if there's an error
447
+ puts "Warning: Error processing symbol class #{symbol_class}: #{e.message}" if $DEBUG
448
+ end
449
+ rescue StandardError => e
450
+ # Continue even if Plurimath integration fails
451
+ puts "Warning: Error loading Plurimath symbols: #{e.message}" if $DEBUG
452
+ end
453
+
454
+ unicode_to_asciimath
455
+ end
456
+ end
457
+
458
+ # ValidateAscii command for checking EXPRESS files for non-ASCII characters
459
+ class ValidateAscii < Thor
460
+ desc "validate-ascii EXPRESS_FILE_PATH",
461
+ "Validate EXPRESS files for ASCII-only content"
462
+ option :recursive, type: :boolean, default: false, aliases: "-r",
463
+ desc: "Validate EXPRESS files under the specified " \
464
+ "path recursively"
465
+ option :yaml, type: :boolean, default: false, aliases: "-y",
466
+ desc: "Output results in YAML format"
467
+
468
+ def validate_ascii(express_file_path) # rubocop:disable Metrics/AbcSize
469
+ if File.file?(express_file_path)
470
+ unless File.exist?(express_file_path)
471
+ raise Errno::ENOENT, "Specified EXPRESS file " \
472
+ "`#{express_file_path}` not found."
473
+ end
474
+
475
+ if File.extname(express_file_path) != ".exp"
476
+ raise ArgumentError, "Specified file `#{express_file_path}` is " \
477
+ "not an EXPRESS file."
478
+ end
479
+
480
+ exp_files = [express_file_path]
481
+ elsif options[:recursive]
482
+ # Support the relative path with glob pattern
483
+ base_path = File.expand_path(express_file_path)
484
+ exp_files = Dir.glob("#{base_path}/**/*.exp")
485
+ else
486
+ # Non-recursive option
487
+ base_path = File.expand_path(express_file_path)
488
+ exp_files = Dir.glob("#{base_path}/*.exp")
489
+ end
490
+
491
+ if exp_files.empty?
492
+ raise Errno::ENOENT, "No EXPRESS files found in " \
493
+ "`#{express_file_path}`."
494
+ end
495
+
496
+ run(exp_files)
497
+ end
498
+
499
+ private
500
+
501
+ def run(exp_files)
502
+ # Process all files and collect violations
503
+ collection = NonAsciiViolationCollection.new
504
+
505
+ exp_files.each do |exp_file|
506
+ collection.process_file(exp_file)
507
+ end
508
+
509
+ # Output results based on format
510
+ if options[:yaml]
511
+ puts collection.to_yaml_data.to_yaml
512
+ else
513
+ collection.print_text_output
514
+ collection.print_table_output if collection.files_with_violations.positive?
515
+ end
516
+ end
517
+ end
518
+ end
519
+ end
@@ -5,8 +5,8 @@ require_relative "../utils"
5
5
 
6
6
  module Suma
7
7
  module Cli
8
- # Links command for managing EXPRESS links
9
- class Links < Thor
8
+ # ValidateLinks command for managing EXPRESS links
9
+ class ValidateLinks < Thor
10
10
  desc "extract_and_validate SCHEMAS_FILE DOCUMENTS_PATH [OUTPUT_FILE]",
11
11
  "Extract and validate express links without creating intermediate file"
12
12
  def extract_and_validate(schemas_file = "schemas-srl.yml",
data/lib/suma/cli.rb CHANGED
@@ -2,6 +2,7 @@
2
2
 
3
3
  require "thor"
4
4
  require_relative "thor_ext"
5
+ require_relative "cli/validate"
5
6
 
6
7
  module Suma
7
8
  module Cli
@@ -22,12 +23,6 @@ module Suma
22
23
  Cli::Build.start
23
24
  end
24
25
 
25
- desc "links SUBCOMMAND ...ARGS", "Manage EXPRESS links"
26
- def links(*_args)
27
- require_relative "cli/links"
28
- Cli::Links.start
29
- end
30
-
31
26
  desc "reformat EXPRESS_FILE_PATH",
32
27
  "Reformat EXPRESS files"
33
28
  option :recursive, type: :boolean, default: false, aliases: "-r",
@@ -37,6 +32,13 @@ module Suma
37
32
  require_relative "cli/reformat"
38
33
  Cli::Reformat.start
39
34
  end
35
+
36
+ desc "validate SUBCOMMAND ...ARGS", "Validate express documents"
37
+ subcommand "validate", Cli::Validate
38
+
39
+ def self.exit_on_failure?
40
+ true
41
+ end
40
42
  end
41
43
  end
42
44
  end
data/lib/suma/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Suma
4
- VERSION = "0.1.11"
4
+ VERSION = "0.1.12"
5
5
  end
data/suma.gemspec CHANGED
@@ -36,7 +36,9 @@ Gem::Specification.new do |spec| # rubocop:disable Metrics/BlockLength
36
36
  spec.add_dependency "expressir", "~> 2.1"
37
37
  spec.add_dependency "lutaml-model", "~> 0.7"
38
38
  spec.add_dependency "metanorma-cli"
39
+ spec.add_dependency "plurimath"
39
40
  spec.add_dependency "ruby-progressbar"
41
+ spec.add_dependency "terminal-table", "~> 3.0"
40
42
  spec.add_dependency "thor", ">= 0.20"
41
43
  spec.metadata["rubygems_mfa_required"] = "true"
42
44
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: suma
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.11
4
+ version: 0.1.12
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2025-03-27 00:00:00.000000000 Z
11
+ date: 2025-04-03 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: expressir
@@ -52,6 +52,20 @@ dependencies:
52
52
  - - ">="
53
53
  - !ruby/object:Gem::Version
54
54
  version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: plurimath
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
55
69
  - !ruby/object:Gem::Dependency
56
70
  name: ruby-progressbar
57
71
  requirement: !ruby/object:Gem::Requirement
@@ -66,6 +80,20 @@ dependencies:
66
80
  - - ">="
67
81
  - !ruby/object:Gem::Version
68
82
  version: '0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: terminal-table
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: '3.0'
90
+ type: :runtime
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - "~>"
95
+ - !ruby/object:Gem::Version
96
+ version: '3.0'
69
97
  - !ruby/object:Gem::Dependency
70
98
  name: thor
71
99
  requirement: !ruby/object:Gem::Requirement
@@ -106,8 +134,10 @@ files:
106
134
  - lib/suma.rb
107
135
  - lib/suma/cli.rb
108
136
  - lib/suma/cli/build.rb
109
- - lib/suma/cli/links.rb
110
137
  - lib/suma/cli/reformat.rb
138
+ - lib/suma/cli/validate.rb
139
+ - lib/suma/cli/validate_ascii.rb
140
+ - lib/suma/cli/validate_links.rb
111
141
  - lib/suma/collection_config.rb
112
142
  - lib/suma/collection_manifest.rb
113
143
  - lib/suma/express_schema.rb