suma 0.1.10 → 0.1.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 9c08dd5c9b1e53371518104f17eaa1b0ae59590069e15239051f2619e3083086
4
- data.tar.gz: f4aa885d0259c98ee8f00eefdb21b2fd6d4c2b9ac2e64d6987ce55fe362b5bab
3
+ metadata.gz: 2f4f11e324818926bf5846c777a2ae2e1e317f5d174f9fd6f41b0d8d7f955816
4
+ data.tar.gz: 78fafa5a5247461c7e9907594aaa4cd9b6ce3644966423ec9ded7917f7be2f1c
5
5
  SHA512:
6
- metadata.gz: a0fcddad0380d5c5e0a714729c0fc59e8195f83f7af1c40fe29caf8fd124cc75fffdaceaeb973b5d4f6cedd0ab526695b7019b5d2391faa94a9b4735267b389f
7
- data.tar.gz: 6f667b5a28f8807980351345dc49ab4959a01b35a1d5463a2afb1851fa2718c55d53916248e2f92eca58d8f899466752d2bd51579747e8d74a8391ba43f3309f
6
+ metadata.gz: 718b82f083343c86f5dd9e3b7fcf82fab4b918a6911bd29c8b080b24cb6ca7e40e4f13f545ce16728789dd4e7f06d75dca6e1fb702ecde2ed2f3840951b1c356
7
+ data.tar.gz: 74b9d53b69b1b3008f4eeae03e1811fed2d35fbdbe4108c9f9e368fde96c98d84c8ce3a98b682bdf8669cdd136e4d8c43ad9855537f709f829acfe0f1ed6bcee
data/.rubocop_todo.yml CHANGED
@@ -1,6 +1,6 @@
1
1
  # This configuration was generated by
2
2
  # `rubocop --auto-gen-config`
3
- # on 2025-03-23 10:59:23 UTC using RuboCop version 1.74.0.
3
+ # on 2025-04-03 06:48:36 UTC using RuboCop version 1.75.1.
4
4
  # The point is for the user to remove these configuration records
5
5
  # one by one as the offenses are removed from the code base.
6
6
  # Note that changes in the inspected code, or installation of new
@@ -13,7 +13,17 @@ Gemspec/DuplicatedAssignment:
13
13
  Exclude:
14
14
  - 'suma.gemspec'
15
15
 
16
- # Offense count: 43
16
+ # Offense count: 2
17
+ # This cop supports safe autocorrection (--autocorrect).
18
+ # Configuration parameters: AllowMultipleStyles, EnforcedHashRocketStyle, EnforcedColonStyle, EnforcedLastArgumentHashStyle.
19
+ # SupportedHashRocketStyles: key, separator, table
20
+ # SupportedColonStyles: key, separator, table
21
+ # SupportedLastArgumentHashStyles: always_inspect, always_ignore, ignore_implicit, ignore_explicit
22
+ Layout/HashAlignment:
23
+ Exclude:
24
+ - 'lib/suma/cli/validate.rb'
25
+
26
+ # Offense count: 55
17
27
  # This cop supports safe autocorrection (--autocorrect).
18
28
  # Configuration parameters: Max, AllowHeredoc, AllowURI, URISchemes, IgnoreCopDirectives, AllowedPatterns, SplitStrings.
19
29
  # URISchemes: http, https
@@ -21,33 +31,38 @@ Layout/LineLength:
21
31
  Exclude:
22
32
  - 'lib/suma/cli.rb'
23
33
  - 'lib/suma/cli/build.rb'
24
- - 'lib/suma/cli/links.rb'
34
+ - 'lib/suma/cli/validate.rb'
35
+ - 'lib/suma/cli/validate_ascii.rb'
36
+ - 'lib/suma/cli/validate_links.rb'
25
37
  - 'lib/suma/collection_manifest.rb'
26
38
  - 'lib/suma/processor.rb'
27
39
  - 'lib/suma/schema_attachment.rb'
28
40
  - 'lib/suma/schema_collection.rb'
29
41
  - 'lib/suma/schema_document.rb'
30
42
  - 'lib/suma/thor_ext.rb'
43
+ - 'spec/suma/cli/validate_ascii_spec.rb'
31
44
  - 'suma.gemspec'
32
45
 
33
- # Offense count: 1
34
- Lint/DuplicateMethods:
46
+ # Offense count: 3
47
+ # This cop supports safe autocorrection (--autocorrect).
48
+ # Configuration parameters: AllowInHeredoc.
49
+ Layout/TrailingWhitespace:
35
50
  Exclude:
36
- - 'lib/suma/express_schema.rb'
51
+ - 'lib/suma/cli.rb'
52
+ - 'lib/suma/cli/validate.rb'
37
53
 
38
54
  # Offense count: 2
39
- # This cop supports safe autocorrection (--autocorrect).
40
- # Configuration parameters: AutoCorrect, AllowUnusedKeywordArguments, IgnoreEmptyMethods, IgnoreNotImplementedMethods, NotImplementedExceptions.
41
- # NotImplementedExceptions: NotImplementedError
42
- Lint/UnusedMethodArgument:
55
+ Lint/DuplicateMethods:
43
56
  Exclude:
44
- - 'lib/suma/cli.rb'
57
+ - 'lib/suma/cli/validate_ascii.rb'
58
+ - 'lib/suma/express_schema.rb'
45
59
 
46
- # Offense count: 11
60
+ # Offense count: 15
47
61
  # Configuration parameters: AllowedMethods, AllowedPatterns, CountRepeatedAttributes, Max.
48
62
  Metrics/AbcSize:
49
63
  Exclude:
50
- - 'lib/suma/cli/links.rb'
64
+ - 'lib/suma/cli/validate_ascii.rb'
65
+ - 'lib/suma/cli/validate_links.rb'
51
66
  - 'lib/suma/schema_attachment.rb'
52
67
  - 'lib/suma/schema_document.rb'
53
68
  - 'lib/suma/thor_ext.rb'
@@ -58,28 +73,39 @@ Metrics/AbcSize:
58
73
  Metrics/BlockLength:
59
74
  Max: 64
60
75
 
61
- # Offense count: 2
76
+ # Offense count: 4
62
77
  # Configuration parameters: AllowedMethods, AllowedPatterns, Max.
63
78
  Metrics/CyclomaticComplexity:
64
79
  Exclude:
65
- - 'lib/suma/cli/links.rb'
80
+ - 'lib/suma/cli/validate_ascii.rb'
81
+ - 'lib/suma/cli/validate_links.rb'
66
82
  - 'lib/suma/thor_ext.rb'
67
83
 
68
- # Offense count: 9
84
+ # Offense count: 21
69
85
  # Configuration parameters: CountComments, CountAsOne, AllowedMethods, AllowedPatterns.
70
86
  Metrics/MethodLength:
71
- Max: 72
87
+ Max: 107
72
88
 
73
- # Offense count: 3
89
+ # Offense count: 4
74
90
  # Configuration parameters: CountKeywordArgs, MaxOptionalParameters.
75
91
  Metrics/ParameterLists:
76
92
  Max: 6
77
93
 
78
- # Offense count: 1
94
+ # Offense count: 2
79
95
  # Configuration parameters: AllowedMethods, AllowedPatterns, Max.
80
96
  Metrics/PerceivedComplexity:
81
97
  Exclude:
82
- - 'lib/suma/cli/links.rb'
98
+ - 'lib/suma/cli/validate_ascii.rb'
99
+ - 'lib/suma/cli/validate_links.rb'
100
+
101
+ # Offense count: 5
102
+ # Configuration parameters: EnforcedStyle, CheckMethodNames, CheckSymbols, AllowedIdentifiers, AllowedPatterns.
103
+ # SupportedStyles: snake_case, normalcase, non_integer
104
+ # AllowedIdentifiers: TLS1_1, TLS1_2, capture3, iso8601, rfc1123_date, rfc822, rfc2822, rfc3339, x86_64
105
+ Naming/VariableNumber:
106
+ Exclude:
107
+ - 'lib/suma/cli/validate_ascii.rb'
108
+ - 'spec/suma/cli/validate_ascii_spec.rb'
83
109
 
84
110
  # Offense count: 1
85
111
  # Configuration parameters: MinSize.
@@ -87,9 +113,14 @@ Performance/CollectionLiteralInLoop:
87
113
  Exclude:
88
114
  - 'spec/suma/cli_spec.rb'
89
115
 
90
- # Offense count: 1
116
+ # Offense count: 3
117
+ # Configuration parameters: CountAsOne.
118
+ RSpec/ExampleLength:
119
+ Max: 16
120
+
121
+ # Offense count: 5
91
122
  RSpec/MultipleExpectations:
92
- Max: 2
123
+ Max: 12
93
124
 
94
125
  # Offense count: 1
95
126
  # This cop supports safe autocorrection (--autocorrect).
@@ -97,4 +128,11 @@ RSpec/MultipleExpectations:
97
128
  # SupportedStyles: empty, nil, both
98
129
  Style/EmptyElse:
99
130
  Exclude:
100
- - 'lib/suma/cli/links.rb'
131
+ - 'lib/suma/cli/validate_links.rb'
132
+
133
+ # Offense count: 4
134
+ # This cop supports safe autocorrection (--autocorrect).
135
+ # Configuration parameters: MaxUnannotatedPlaceholdersAllowed, Mode, AllowedMethods, AllowedPatterns.
136
+ # SupportedStyles: annotated, template, unannotated
137
+ Style/FormatStringToken:
138
+ EnforcedStyle: unannotated
data/README.adoc CHANGED
@@ -37,9 +37,10 @@ $ gem install suma
37
37
  # Defaults to `suma help`
38
38
  $ suma
39
39
  Commands:
40
- suma build METANORMA_SITE_MANIFEST # Build collection specified in site manifest (`metanorma*.yml`)
41
- suma links SUBCOMMAND ...ARGS # Manage EXPRESS links
42
- suma help [COMMAND] # Describe available commands or one specific command
40
+ suma build METANORMA_SITE_MANIFEST # Build collection specified in site manifest (`metanorma*.yml`)
41
+ suma reformat EXPRESS_FILE_PATH # Reformat EXPRESS files
42
+ suma validate SUBCOMMAND ...ARGS # Validate express documents
43
+ suma help [COMMAND] # Describe available commands or one specific command
43
44
  ----
44
45
 
45
46
  === Build command
@@ -89,19 +90,61 @@ $ bundle exec suma build --no-compile metanorma-srl.yml
89
90
  All documents need to have a `schemas.yaml` in their document root that lists
90
91
  out which schemas the document includes.
91
92
 
92
- === Links command
93
+ === Reformat command
94
+
95
+ The `reformat` command provides utilities for reformatting EXPRESS files.
96
+
97
+ [source,sh]
98
+ ----
99
+ $ suma reformat EXPRESS_FILE_PATH [options]
100
+ ----
101
+
102
+ Parameters:
103
+
104
+ `EXPRESS_FILE_PATH`:: Path to an EXPRESS file or a folder containing EXPRESS
105
+ files
106
+
107
+ Options:
108
+
109
+ `--[no-]recursive`, `-r`:: Select EXPRESS files recursively based on the specified
110
+ folder path (default: false)
111
+
112
+ [example]
113
+ ====
114
+ .To reformat all EXPRESS files under the current directory recursively
115
+ [source,sh]
116
+ ----
117
+ $ bundle exec suma reformat `pwd` -r
118
+ ----
119
+ ====
120
+
121
+ This command:
122
+
123
+ * Loads the EXPRESS files specified in the `EXPRESS_FILE_PATH`
124
+ * Reformats and saves the loaded EXPRESS files
125
+
126
+ === Validate command
93
127
 
94
128
  ==== General
95
129
 
96
- The `links` command provides utilities for managing EXPRESS links.
130
+ The `validate` command groups various validation utilities for EXPRESS documents.
97
131
 
98
- ==== Extract and validate
132
+ [source,sh]
133
+ ----
134
+ $ suma validate SUBCOMMAND [options]
135
+ ----
136
+
137
+ Subcommands:
138
+ - `links` - Validate EXPRESS links
139
+ - `ascii` - Check for non-ASCII characters in EXPRESS files
99
140
 
100
- Extracts and validates EXPRESS links without creating intermediate files.
141
+ ==== Links subcommand
142
+
143
+ The `links` subcommand extracts and validates EXPRESS links without creating intermediate files.
101
144
 
102
145
  [source,sh]
103
146
  ----
104
- $ suma links extract_and_validate SCHEMAS_FILE DOCUMENTS_PATH [OUTPUT_FILE]
147
+ $ suma validate links SCHEMAS_FILE DOCUMENTS_PATH [OUTPUT_FILE]
105
148
  ----
106
149
 
107
150
  Parameters:
@@ -117,7 +160,7 @@ Parameters:
117
160
  .To validate EXPRESS links in documents
118
161
  [source,sh]
119
162
  ----
120
- $ bundle exec suma links extract_and_validate schemas-srl.yml documents validation_results.txt
163
+ $ bundle exec suma validate links schemas-srl.yml documents validation_results.txt
121
164
  ----
122
165
  ====
123
166
 
@@ -129,6 +172,120 @@ This command:
129
172
  * Writes validation results to the `OUTPUT_FILE`
130
173
  * Provides progress bars to track schema loading and link validation
131
174
 
175
+ ==== ASCII subcommand
176
+
177
+ The `ascii` subcommand detects non-ASCII characters in EXPRESS files and reports on those exact lines, providing replacement suggestions.
178
+
179
+ [source,sh]
180
+ ----
181
+ $ suma validate ascii EXPRESS_FILE_PATH [options]
182
+ ----
183
+
184
+ Parameters:
185
+
186
+ `EXPRESS_FILE_PATH`:: Path to an EXPRESS file or a folder containing EXPRESS
187
+ files
188
+
189
+ Options:
190
+
191
+ `--[no-]recursive`, `-r`:: Select EXPRESS files recursively based on the specified
192
+ folder path (default: false)
193
+ `--[no-]yaml`, `-y`:: Output results in YAML format for machine processing (default: false)
194
+
195
+ [example]
196
+ ====
197
+ .To validate all EXPRESS files in a specific directory recursively
198
+ [source,sh]
199
+ ----
200
+ $ bundle exec suma validate ascii ../iso-10303/schemas -r
201
+ ----
202
+
203
+ .To validate and output results in YAML format
204
+ [source,sh]
205
+ ----
206
+ $ bundle exec suma validate ascii ../iso-10303/schemas -r -y > validation.yml
207
+ ----
208
+ ====
209
+
210
+ This command:
211
+
212
+ * Loads the EXPRESS files specified in the `EXPRESS_FILE_PATH`
213
+ * Scans each line for non-ASCII characters
214
+ * Reports detailed information about each violation, including:
215
+ ** Filename and line number
216
+ ** The exact line content
217
+ ** Visual indication of the non-ASCII sequence location
218
+ ** Character details with hexadecimal representation
219
+ * Provides specific replacement suggestions:
220
+ ** For math symbols: provides equivalent AsciiMath notation
221
+ ** For other non-ASCII characters: provides ISO 10303-11 encoded string literal format
222
+ * Displays a summary table showing:
223
+ ** File path (directory/filename)
224
+ ** Each non-ASCII symbol found
225
+ ** Suggested replacement for each symbol
226
+ ** Number of occurrences of each character
227
+ ** Totals row showing unique character count and overall occurrences
228
+ * Summarizes findings across all scanned files
229
+ * Optionally outputs structured data in YAML format with detailed occurrence information
230
+
231
+ Human-readable output format example:
232
+
233
+ [source,text]
234
+ ----
235
+ /path/to/file.exp:
236
+ Line 42, Column 15:
237
+ ENTITY some_entity (name: STRING, description: "résumé");
238
+ ^^^^^
239
+ "é" - Hex: 0xe9, UTF-8 bytes: 0xc3 0xa9
240
+ Replacement: ISO 10303-11: "000000E9"
241
+
242
+ "s" - Hex: 0x73, UTF-8 bytes: 0x73
243
+
244
+ "u" - Hex: 0x75, UTF-8 bytes: 0x75
245
+
246
+ "m" - Hex: 0x6d, UTF-8 bytes: 0x6d
247
+
248
+ "é" - Hex: 0xe9, UTF-8 bytes: 0xc3 0xa9
249
+ Replacement: ISO 10303-11: "000000E9"
250
+
251
+ Found 1 non-ASCII sequence(s) in file.exp
252
+
253
+ Summary:
254
+ Scanned 3 EXPRESS file(s)
255
+ Found 1 non-ASCII sequence(s) in 1 file(s)
256
+
257
+ +------------------+--------------------+-----------------------------+-------------+
258
+ | File | Symbol | Replacement | Occurrences |
259
+ +------------------+--------------------+-----------------------------+-------------+
260
+ | path/to/file.exp | "é" (0xe9) | ISO 10303-11: "000000E9" | 2 |
261
+ +------------------+--------------------+-----------------------------+-------------+
262
+ | TOTAL | 1 unique | | 2 |
263
+ +------------------+--------------------+-----------------------------+-------------+
264
+ ----
265
+
266
+ ===== Japanese Character Example
267
+
268
+ For Japanese characters like 神戸 (Kobe), the command will provide ISO 10303-11 encoded string literal replacements:
269
+
270
+ [source,text]
271
+ ----
272
+ "神" - Hex: 0x795e, UTF-8 bytes: 0xe7 0xa5 0x9e
273
+ Replacement: ISO 10303-11: "0000795E"
274
+
275
+ "戸" - Hex: 0x6238, UTF-8 bytes: 0xe6 0x88 0xb8
276
+ Replacement: ISO 10303-11: "00006238"
277
+ ----
278
+
279
+ ===== Math Symbol Example
280
+
281
+ For mathematical symbols, the command will provide equivalent AsciiMath notation:
282
+
283
+ [source,text]
284
+ ----
285
+ "×" - Hex: 0xd7, UTF-8 bytes: 0xc3 0x97
286
+ Replacement: AsciiMath: xx
287
+ ----
288
+
132
289
 
133
290
  == Usage: Ruby
134
291
 
@@ -0,0 +1,89 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "thor"
4
+ require_relative "../thor_ext"
5
+
6
+ module Suma
7
+ module Cli
8
+ # Reformat command for reformatting EXPRESS files
9
+ class Reformat < Thor
10
+ desc "reformat EXPRESS_FILE_PATH",
11
+ "Reformat EXPRESS files"
12
+ option :recursive, type: :boolean, default: false, aliases: "-r",
13
+ desc: "Reformat EXPRESS files under the specified " \
14
+ "path recursively"
15
+
16
+ def reformat(express_file_path) # rubocop:disable Metrics/AbcSize
17
+ if File.file?(express_file_path)
18
+ unless File.exist?(express_file_path)
19
+ raise Errno::ENOENT, "Specified EXPRESS file " \
20
+ "`#{express_file_path}` not found."
21
+ end
22
+
23
+ if File.extname(express_file_path) != ".exp"
24
+ raise ArgumentError, "Specified file `#{express_file_path}` is " \
25
+ "not an EXPRESS file."
26
+ end
27
+
28
+ exp_files = [express_file_path]
29
+ elsif options[:recursive]
30
+ exp_files = Dir.glob("#{express_file_path}/**/*.exp")
31
+ else
32
+ exp_files = Dir.glob("#{express_file_path}/*.exp")
33
+ end
34
+
35
+ if exp_files.empty?
36
+ raise Errno::ENOENT, "No EXPRESS files found in " \
37
+ "`#{express_file_path}`."
38
+ end
39
+
40
+ run(exp_files)
41
+ end
42
+
43
+ private
44
+
45
+ def run(exp_files)
46
+ exp_files.each do |exp_file|
47
+ reformat_exp(exp_file)
48
+ end
49
+ end
50
+
51
+ def reformat_exp(file) # rubocop:disable Metrics/AbcSize
52
+ # Read the file content
53
+ file_content = File.read(file)
54
+
55
+ # Extract all comments between '(*"' and '\n*)'
56
+ # Avoid incorrect selection of some comment blocks
57
+ # containing '(*text*)' inside
58
+ comments = file_content.scan(/\(\*"(.*?)\n\*\)/m).map(&:first)
59
+
60
+ if comments.count.positive?
61
+ content_without_comments = file_content.gsub(/\(\*".*?\n\*\)/m, "")
62
+
63
+ # remove extra newlines
64
+ new_content = content_without_comments.gsub(/(\n\n+)/, "\n\n")
65
+ # Add '(*"' and '\n*)' to enclose the comment block
66
+ new_comments = comments.map { |c| "(*\"#{c}\n*)" }.join("\n\n")
67
+ # Append the comments to the end of the file
68
+ new_content = "#{new_content}\n\n#{new_comments}\n"
69
+
70
+ # Compare the changes between the original content with the modified
71
+ # content, if the changes are just whitespaces, skip modifying the
72
+ # file
73
+ if file_content.gsub(/(\n+)/, "\n") == new_content.gsub(/(\n+)/, "\n")
74
+ puts "No changes made to #{file}"
75
+ return
76
+ end
77
+
78
+ update_exp(file, new_content)
79
+ end
80
+ end
81
+
82
+ def update_exp(file, content)
83
+ # Write the modified content to a new file
84
+ File.write(file, content)
85
+ puts "Reformatted EXPRESS file and saved to #{file}"
86
+ end
87
+ end
88
+ end
89
+ end
@@ -0,0 +1,34 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "thor"
4
+
5
+ module Suma
6
+ module Cli
7
+ # Main validate command that groups the validation subcommands
8
+ class Validate < Thor
9
+ desc "links SCHEMAS_FILE DOCUMENTS_PATH [OUTPUT_FILE]",
10
+ "Extract and validate express links without creating intermediate file"
11
+ def links(*args)
12
+ require_relative "validate_links"
13
+
14
+ # Forward the command to ValidateLinks
15
+ links = Cli::ValidateLinks.new
16
+ links.extract_and_validate(*args)
17
+ end
18
+
19
+ desc "ascii EXPRESS_FILE_PATH",
20
+ "Validate EXPRESS files for ASCII-only content"
21
+ option :recursive, type: :boolean, default: false, aliases: "-r",
22
+ desc: "Validate EXPRESS files under the specified path recursively"
23
+ option :yaml, type: :boolean, default: false, aliases: "-y",
24
+ desc: "Output results in YAML format"
25
+ def ascii(express_file_path)
26
+ require_relative "validate_ascii"
27
+
28
+ validator = Cli::ValidateAscii.new
29
+ validator.options = options
30
+ validator.validate_ascii(express_file_path)
31
+ end
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,519 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "thor"
4
+ require "yaml"
5
+ require "terminal-table"
6
+ require "plurimath"
7
+ require "set" # For using Set in unique_character_count
8
+ require_relative "../thor_ext"
9
+
10
+ module Suma
11
+ module Cli
12
+ # Represents a non-ASCII character with its details and replacement
13
+ class NonAsciiCharacter
14
+ attr_reader :char, :hex, :utf8, :is_math, :replacement,
15
+ :replacement_type, :occurrences
16
+
17
+ def initialize(char, hex, utf8, is_math, replacement, replacement_type)
18
+ @char = char
19
+ @hex = hex
20
+ @utf8 = utf8
21
+ @is_math = is_math
22
+ @replacement = replacement
23
+ @replacement_type = replacement_type
24
+ @occurrences = []
25
+ end
26
+
27
+ def add_occurrence(line_number, column, line)
28
+ @occurrences << {
29
+ line_number: line_number,
30
+ column: column,
31
+ line: line,
32
+ }
33
+ end
34
+
35
+ def replacement_text
36
+ @is_math ? "AsciiMath: #{@replacement}" : "ISO 10303-11: #{@replacement}"
37
+ end
38
+
39
+ def occurrence_count
40
+ @occurrences.size
41
+ end
42
+
43
+ def to_h
44
+ {
45
+ character: @char,
46
+ hex: @hex,
47
+ utf8: @utf8,
48
+ is_math: @is_math,
49
+ replacement_type: @replacement_type,
50
+ replacement: @replacement,
51
+ occurrence_count: occurrence_count,
52
+ occurrences: @occurrences,
53
+ }
54
+ end
55
+ end
56
+
57
+ # Represents all non-ASCII characters in a file
58
+ class FileViolations
59
+ attr_reader :path, :filename, :directory, :violations, :unique_characters
60
+
61
+ def initialize(file_path)
62
+ @path = file_path
63
+ @filename = File.basename(file_path)
64
+ @directory = File.dirname(file_path)
65
+ @characters = {} # Map of characters to NonAsciiCharacter objects
66
+ @violations = [] # List of violations (line, column, etc.)
67
+ end
68
+
69
+ def add_violation(line_number, column, match, char_details, line)
70
+ violation = {
71
+ line_number: line_number,
72
+ column: column,
73
+ match: match,
74
+ char_details: char_details,
75
+ line: line,
76
+ }
77
+
78
+ @violations << violation
79
+
80
+ # Register each character
81
+ char_details.each do |detail|
82
+ char = detail[:char]
83
+ unless @characters[char]
84
+ @characters[char] = NonAsciiCharacter.new(
85
+ char,
86
+ detail[:hex],
87
+ detail[:utf8],
88
+ detail[:is_math],
89
+ detail[:replacement],
90
+ detail[:replacement_type],
91
+ )
92
+ end
93
+
94
+ @characters[char].add_occurrence(line_number, column, line)
95
+ end
96
+ end
97
+
98
+ def violation_count
99
+ @violations.size
100
+ end
101
+
102
+ def unique_characters
103
+ @characters.values
104
+ end
105
+
106
+ def display_path
107
+ "#{File.basename(@directory)}/#{@filename}"
108
+ end
109
+
110
+ def full_path
111
+ File.expand_path(@path)
112
+ end
113
+
114
+ def to_h
115
+ {
116
+ file: display_path,
117
+ count: violation_count,
118
+ non_ascii_characters: unique_characters.map(&:to_h),
119
+ }
120
+ end
121
+ end
122
+
123
+ # Collection of all violations across multiple files
124
+ class NonAsciiViolationCollection
125
+ attr_reader :file_violations, :total_files
126
+
127
+ def initialize
128
+ @file_violations = {} # Map of file paths to FileViolations objects
129
+ @total_files = 0
130
+ @unicode_to_asciimath = nil
131
+ end
132
+
133
+ def process_file(file)
134
+ @total_files += 1
135
+
136
+ # Initialize the mapping once
137
+ @unicode_to_asciimath ||= build_unicode_to_asciimath_map
138
+
139
+ file_violations = process_file_violations(file)
140
+ return if file_violations.violations.empty?
141
+
142
+ @file_violations[file] = file_violations
143
+ end
144
+
145
+ def files_with_violations
146
+ @file_violations.size
147
+ end
148
+
149
+ def total_violations
150
+ @file_violations.values.sum(&:violation_count)
151
+ end
152
+
153
+ def unique_character_count
154
+ # Get total unique characters across all files
155
+ all_chars = Set.new
156
+ @file_violations.each_value do |file_violation|
157
+ file_violation.unique_characters.each do |char|
158
+ all_chars.add(char.char)
159
+ end
160
+ end
161
+ all_chars.size
162
+ end
163
+
164
+ def total_occurrence_count
165
+ # Sum all occurrences of all characters across all files
166
+ @file_violations.values.sum do |file_violation|
167
+ file_violation.unique_characters.sum(&:occurrence_count)
168
+ end
169
+ end
170
+
171
+ def to_yaml_data
172
+ {
173
+ summary: {
174
+ total_files: @total_files,
175
+ files_with_violations: files_with_violations,
176
+ total_violations: total_violations,
177
+ total_unique_characters: unique_character_count,
178
+ total_occurrences: total_occurrence_count,
179
+ },
180
+ violations: @file_violations.transform_keys do |k|
181
+ File.expand_path(k)
182
+ end.transform_values(&:to_h),
183
+ }
184
+ end
185
+
186
+ def print_text_output
187
+ return if @file_violations.empty?
188
+
189
+ # Print each file's violations
190
+ @file_violations.each_value do |file_violation|
191
+ puts "\n#{file_violation.display_path}:"
192
+
193
+ file_violation.violations.each do |v|
194
+ puts " Line #{v[:line_number]}, Column #{v[:column]}:"
195
+ puts " #{v[:line]}"
196
+ puts " #{' ' * v[:column]}#{'^' * v[:match].length} Non-ASCII sequence"
197
+
198
+ v[:char_details].each do |cd|
199
+ character = file_violation.unique_characters.find do |c|
200
+ c.char == cd[:char]
201
+ end
202
+ next unless character
203
+
204
+ puts " \"#{cd[:char]}\" - Hex: #{cd[:hex]}, UTF-8 bytes: #{cd[:utf8]}"
205
+ puts " Replacement: #{character.replacement_text}"
206
+ end
207
+ puts ""
208
+ end
209
+
210
+ puts " Found #{file_violation.violation_count} non-ASCII sequence(s) in #{file_violation.filename}\n"
211
+ end
212
+
213
+ # Print summary
214
+ puts "\nSummary:"
215
+ puts " Scanned #{@total_files} EXPRESS file(s)"
216
+ puts " Found #{total_violations} non-ASCII sequence(s) in #{files_with_violations} file(s)"
217
+ end
218
+
219
+ def print_table_output
220
+ return if @file_violations.empty?
221
+
222
+ table = ::Terminal::Table.new(
223
+ title: "Non-ASCII Characters Summary",
224
+ headings: ["File", "Symbol", "Replacement", "Occurrences"],
225
+ )
226
+
227
+ total_occurrences = 0
228
+
229
+ @file_violations.each_value do |file_violation|
230
+ file_violation.unique_characters.each do |character|
231
+ occurrence_count = character.occurrence_count
232
+ total_occurrences += occurrence_count
233
+
234
+ table.add_row [
235
+ file_violation.display_path,
236
+ "\"#{character.char}\" (#{character.hex})",
237
+ character.replacement_text,
238
+ occurrence_count,
239
+ ]
240
+ end
241
+ end
242
+
243
+ # Add a separator and total row
244
+ table.add_separator
245
+ table.add_row [
246
+ "TOTAL",
247
+ "#{unique_character_count} unique",
248
+ "",
249
+ total_occurrences,
250
+ ]
251
+
252
+ puts "\n#{table}\n"
253
+ end
254
+
255
+ private
256
+
257
+ def process_file_violations(file)
258
+ file_violations = FileViolations.new(file)
259
+
260
+ # Process file line by line
261
+ File.readlines(file,
262
+ encoding: "UTF-8").each_with_index do |line, line_idx|
263
+ line_number = line_idx + 1
264
+
265
+ # Skip if line only contains ASCII
266
+ next unless /[^\x00-\x7F]/.match?(line)
267
+
268
+ # Find all non-ASCII sequences
269
+ line.chomp.scan(/([^\x00-\x7F]+)/) do |match|
270
+ match = match[0]
271
+ column = line.index(match)
272
+
273
+ # Process each character in the sequence
274
+ char_details = match.chars.filter_map do |c|
275
+ process_non_ascii_char(c)
276
+ end
277
+
278
+ # Skip if no non-ASCII characters found
279
+ next if char_details.empty?
280
+
281
+ file_violations.add_violation(line_number, column, match,
282
+ char_details, line.chomp)
283
+ end
284
+ end
285
+
286
+ file_violations
287
+ end
288
+
289
+ def process_non_ascii_char(char)
290
+ # Skip ASCII characters
291
+ return nil if char.ord <= 0x7F
292
+
293
+ code_point = char.ord
294
+ hex = "0x#{code_point.to_s(16)}"
295
+ utf8 = code_point.chr(Encoding::UTF_8).bytes.map do |b|
296
+ "0x#{b.to_s(16)}"
297
+ end.join(" ")
298
+
299
+ # Check if it's a math symbol
300
+ if asciimath = @unicode_to_asciimath[char]
301
+ return {
302
+ char: char,
303
+ hex: hex,
304
+ utf8: utf8,
305
+ is_math: true,
306
+ replacement: asciimath,
307
+ replacement_type: "asciimath",
308
+ }
309
+ end
310
+
311
+ # Not a math symbol, use ISO encoding
312
+ {
313
+ char: char,
314
+ hex: hex,
315
+ utf8: utf8,
316
+ is_math: false,
317
+ replacement: encode_iso_10303_11(char),
318
+ replacement_type: "iso-10303-11",
319
+ }
320
+ end
321
+
322
+ def encode_iso_10303_11(char)
323
+ code_point = char.ord
324
+
325
+ # Format the encoded value with double quotes
326
+ if code_point < 0x10000
327
+ "\"#{sprintf('%08X', code_point)}\"" # e.g., "00000041" for 'A'
328
+ else
329
+ # For higher code points, use all four octets
330
+ group = (code_point >> 24) & 0xFF
331
+ plane = (code_point >> 16) & 0xFF
332
+ row = (code_point >> 8) & 0xFF
333
+ cell = code_point & 0xFF
334
+
335
+ "\"#{sprintf('%02X%02X%02X%02X', group, plane, row, cell)}\""
336
+ end
337
+ end
338
+
339
+ def build_unicode_to_asciimath_map
340
+ # Start with a pre-defined mapping of common math symbols
341
+ unicode_to_asciimath = {
342
+ # Greek letters
343
+ "α" => "alpha",
344
+ "β" => "beta",
345
+ "γ" => "gamma",
346
+ "Γ" => "Gamma",
347
+ "δ" => "delta",
348
+ "Δ" => "Delta",
349
+ "ε" => "epsilon",
350
+ "ζ" => "zeta",
351
+ "η" => "eta",
352
+ "θ" => "theta",
353
+ "Θ" => "Theta",
354
+ "ι" => "iota",
355
+ "κ" => "kappa",
356
+ "λ" => "lambda",
357
+ "Λ" => "Lambda",
358
+ "μ" => "mu",
359
+ "ν" => "nu",
360
+ "ξ" => "xi",
361
+ "Ξ" => "Xi",
362
+ "π" => "pi",
363
+ "Π" => "Pi",
364
+ "ρ" => "rho",
365
+ "σ" => "sigma",
366
+ "Σ" => "Sigma",
367
+ "τ" => "tau",
368
+ "υ" => "upsilon",
369
+ "φ" => "phi",
370
+ "Φ" => "Phi",
371
+ "χ" => "chi",
372
+ "ψ" => "psi",
373
+ "Ψ" => "Psi",
374
+ "ω" => "omega",
375
+ "Ω" => "Omega",
376
+
377
+ # Math operators
378
+ "×" => "xx",
379
+ "÷" => "div",
380
+ "±" => "pm",
381
+ "∓" => "mp",
382
+ "∞" => "oo",
383
+ "≤" => "le",
384
+ "≥" => "ge",
385
+ "≠" => "ne",
386
+ "≈" => "~~",
387
+ "≅" => "cong",
388
+ "≡" => "equiv",
389
+ "∈" => "in",
390
+ "∉" => "notin",
391
+ "⊂" => "subset",
392
+ "⊃" => "supset",
393
+ "∩" => "cap",
394
+ "∪" => "cup",
395
+ "∧" => "and",
396
+ "∨" => "or",
397
+ "¬" => "neg",
398
+ "∀" => "forall",
399
+ "∃" => "exists",
400
+ "∄" => "nexists",
401
+ "∇" => "grad",
402
+ "∂" => "del",
403
+ "∑" => "sum",
404
+ "∏" => "prod",
405
+ "∫" => "int",
406
+ "∮" => "oint",
407
+ "√" => "sqrt",
408
+ "⊥" => "perp",
409
+ "‖" => "norm",
410
+ "→" => "rarr",
411
+ "←" => "larr",
412
+ "↔" => "harr",
413
+ "⇒" => "rArr",
414
+ "⇐" => "lArr",
415
+ "⇔" => "hArr",
416
+ }
417
+
418
+ # Augment with symbols from Plurimath
419
+ begin
420
+ # Get all symbols supported by AsciiMath
421
+ Plurimath::Utility.symbols_files.each do |symbol_class|
422
+ symbol = symbol_class.new
423
+
424
+ # Get the Unicode and AsciiMath representations
425
+ unicodes = symbol.to_unicodemath
426
+ asciimaths = symbol.to_asciimath
427
+
428
+ # Skip if either representation is missing
429
+ next unless unicodes.is_a?(Array) && asciimaths.is_a?(Array)
430
+ # Skip if empty arrays
431
+ next if unicodes.empty? || asciimaths.empty?
432
+
433
+ unicodes.each_with_index do |unicode, index|
434
+ # Skip if we're beyond available AsciiMath representations
435
+ next if index >= asciimaths.length
436
+ # Skip empty string values
437
+ next if unicode.to_s.empty?
438
+
439
+ # Map each character to its AsciiMath equivalent
440
+ unicode.to_s.chars.each do |char|
441
+ # Only add if not already in our mapping
442
+ unicode_to_asciimath[char] ||= asciimaths[index]
443
+ end
444
+ end
445
+ rescue StandardError => e
446
+ # Skip this symbol class if there's an error
447
+ puts "Warning: Error processing symbol class #{symbol_class}: #{e.message}" if $DEBUG
448
+ end
449
+ rescue StandardError => e
450
+ # Continue even if Plurimath integration fails
451
+ puts "Warning: Error loading Plurimath symbols: #{e.message}" if $DEBUG
452
+ end
453
+
454
+ unicode_to_asciimath
455
+ end
456
+ end
457
+
458
+ # ValidateAscii command for checking EXPRESS files for non-ASCII characters
459
+ class ValidateAscii < Thor
460
+ desc "validate-ascii EXPRESS_FILE_PATH",
461
+ "Validate EXPRESS files for ASCII-only content"
462
+ option :recursive, type: :boolean, default: false, aliases: "-r",
463
+ desc: "Validate EXPRESS files under the specified " \
464
+ "path recursively"
465
+ option :yaml, type: :boolean, default: false, aliases: "-y",
466
+ desc: "Output results in YAML format"
467
+
468
+ def validate_ascii(express_file_path) # rubocop:disable Metrics/AbcSize
469
+ if File.file?(express_file_path)
470
+ unless File.exist?(express_file_path)
471
+ raise Errno::ENOENT, "Specified EXPRESS file " \
472
+ "`#{express_file_path}` not found."
473
+ end
474
+
475
+ if File.extname(express_file_path) != ".exp"
476
+ raise ArgumentError, "Specified file `#{express_file_path}` is " \
477
+ "not an EXPRESS file."
478
+ end
479
+
480
+ exp_files = [express_file_path]
481
+ elsif options[:recursive]
482
+ # Support the relative path with glob pattern
483
+ base_path = File.expand_path(express_file_path)
484
+ exp_files = Dir.glob("#{base_path}/**/*.exp")
485
+ else
486
+ # Non-recursive option
487
+ base_path = File.expand_path(express_file_path)
488
+ exp_files = Dir.glob("#{base_path}/*.exp")
489
+ end
490
+
491
+ if exp_files.empty?
492
+ raise Errno::ENOENT, "No EXPRESS files found in " \
493
+ "`#{express_file_path}`."
494
+ end
495
+
496
+ run(exp_files)
497
+ end
498
+
499
+ private
500
+
501
+ def run(exp_files)
502
+ # Process all files and collect violations
503
+ collection = NonAsciiViolationCollection.new
504
+
505
+ exp_files.each do |exp_file|
506
+ collection.process_file(exp_file)
507
+ end
508
+
509
+ # Output results based on format
510
+ if options[:yaml]
511
+ puts collection.to_yaml_data.to_yaml
512
+ else
513
+ collection.print_text_output
514
+ collection.print_table_output if collection.files_with_violations.positive?
515
+ end
516
+ end
517
+ end
518
+ end
519
+ end
@@ -5,8 +5,8 @@ require_relative "../utils"
5
5
 
6
6
  module Suma
7
7
  module Cli
8
- # Links command for managing EXPRESS links
9
- class Links < Thor
8
+ # ValidateLinks command for managing EXPRESS links
9
+ class ValidateLinks < Thor
10
10
  desc "extract_and_validate SCHEMAS_FILE DOCUMENTS_PATH [OUTPUT_FILE]",
11
11
  "Extract and validate express links without creating intermediate file"
12
12
  def extract_and_validate(schemas_file = "schemas-srl.yml",
data/lib/suma/cli.rb CHANGED
@@ -2,6 +2,7 @@
2
2
 
3
3
  require "thor"
4
4
  require_relative "thor_ext"
5
+ require_relative "cli/validate"
5
6
 
6
7
  module Suma
7
8
  module Cli
@@ -22,10 +23,21 @@ module Suma
22
23
  Cli::Build.start
23
24
  end
24
25
 
25
- desc "links SUBCOMMAND ...ARGS", "Manage EXPRESS links"
26
- def links(*_args)
27
- require_relative "cli/links"
28
- Cli::Links.start
26
+ desc "reformat EXPRESS_FILE_PATH",
27
+ "Reformat EXPRESS files"
28
+ option :recursive, type: :boolean, default: false, aliases: "-r",
29
+ desc: "Reformat EXPRESS files under the specified " \
30
+ "path recursively"
31
+ def reformat(_express_file_path)
32
+ require_relative "cli/reformat"
33
+ Cli::Reformat.start
34
+ end
35
+
36
+ desc "validate SUBCOMMAND ...ARGS", "Validate express documents"
37
+ subcommand "validate", Cli::Validate
38
+
39
+ def self.exit_on_failure?
40
+ true
29
41
  end
30
42
  end
31
43
  end
data/lib/suma/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Suma
4
- VERSION = "0.1.10"
4
+ VERSION = "0.1.12"
5
5
  end
data/suma.gemspec CHANGED
@@ -33,10 +33,12 @@ Gem::Specification.new do |spec| # rubocop:disable Metrics/BlockLength
33
33
  spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
34
34
  spec.require_paths = ["lib"]
35
35
 
36
- spec.add_dependency "expressir", "~> 2.1.16"
36
+ spec.add_dependency "expressir", "~> 2.1"
37
37
  spec.add_dependency "lutaml-model", "~> 0.7"
38
38
  spec.add_dependency "metanorma-cli"
39
+ spec.add_dependency "plurimath"
39
40
  spec.add_dependency "ruby-progressbar"
41
+ spec.add_dependency "terminal-table", "~> 3.0"
40
42
  spec.add_dependency "thor", ">= 0.20"
41
43
  spec.metadata["rubygems_mfa_required"] = "true"
42
44
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: suma
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.10
4
+ version: 0.1.12
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2025-03-23 00:00:00.000000000 Z
11
+ date: 2025-04-03 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: expressir
@@ -16,14 +16,14 @@ dependencies:
16
16
  requirements:
17
17
  - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: 2.1.16
19
+ version: '2.1'
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: 2.1.16
26
+ version: '2.1'
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: lutaml-model
29
29
  requirement: !ruby/object:Gem::Requirement
@@ -52,6 +52,20 @@ dependencies:
52
52
  - - ">="
53
53
  - !ruby/object:Gem::Version
54
54
  version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: plurimath
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
55
69
  - !ruby/object:Gem::Dependency
56
70
  name: ruby-progressbar
57
71
  requirement: !ruby/object:Gem::Requirement
@@ -66,6 +80,20 @@ dependencies:
66
80
  - - ">="
67
81
  - !ruby/object:Gem::Version
68
82
  version: '0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: terminal-table
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: '3.0'
90
+ type: :runtime
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - "~>"
95
+ - !ruby/object:Gem::Version
96
+ version: '3.0'
69
97
  - !ruby/object:Gem::Dependency
70
98
  name: thor
71
99
  requirement: !ruby/object:Gem::Requirement
@@ -106,7 +134,10 @@ files:
106
134
  - lib/suma.rb
107
135
  - lib/suma/cli.rb
108
136
  - lib/suma/cli/build.rb
109
- - lib/suma/cli/links.rb
137
+ - lib/suma/cli/reformat.rb
138
+ - lib/suma/cli/validate.rb
139
+ - lib/suma/cli/validate_ascii.rb
140
+ - lib/suma/cli/validate_links.rb
110
141
  - lib/suma/collection_config.rb
111
142
  - lib/suma/collection_manifest.rb
112
143
  - lib/suma/express_schema.rb