ruby_unicode_prop 1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 995fd4cfd7ebc81e4621b58517e94c827cb76e06b40d99189537558813c060c4
4
+ data.tar.gz: 402b13940827cd2b4538c2f8ac83bda464c728eee6f6fbd774593c802c55d857
5
+ SHA512:
6
+ metadata.gz: bf2a92ce22fb744822bd6a687c9c5874a8ee22c7dd72dcbadfd830d133f289615b8573107139c6f8de9a659500deda0d85454578d90608d284eef65f1ae38dce
7
+ data.tar.gz: 6dafbbabc31fe0133fd08815fd71b6d086697cfbab3180b3678ed5997c834de4a531f8f083053c5da8541ec031e33f1f8d01069540ee088f43af5319f63c5090
data/.gitignore ADDED
@@ -0,0 +1,51 @@
1
+ # See https://help.github.com/articles/ignoring-files for more about ignoring files.
2
+ #
3
+ # If you find yourself ignoring temporary files generated by your text editor
4
+ # or operating system, you probably want to add a global ignore instead:
5
+ # git config --global core.excludesfile '~/.gitignore_global'
6
+
7
+ # Ignore bundler config.
8
+ /.bundle
9
+ /vendor/bundle
10
+
11
+ # Ignore all logfiles and tempfiles.
12
+ /log/*
13
+ /tmp/*
14
+ !/log/.keep
15
+ !/tmp/.keep
16
+
17
+ .rbenv-version
18
+ # unless supporting rvm < 1.11.0 or doing something fancy, ignore this:
19
+ .rvmrc
20
+
21
+ /node_modules
22
+ /yarn-error.log
23
+
24
+ .byebug_history
25
+
26
+ *.[oa]
27
+ *.so
28
+ *~
29
+ *.nogem
30
+ *nogem.*
31
+ *.bak
32
+ *.BAK
33
+ *.backup
34
+ *.org
35
+ *.orig
36
+ *.elc
37
+ *.pyc
38
+ \#*\#
39
+
40
+ # Elastic Beanstalk Files
41
+ .elasticbeanstalk/*
42
+ !.elasticbeanstalk/*.cfg.yml
43
+ !.elasticbeanstalk/*.global.yml
44
+
45
+ # yard
46
+ *.yardoc
47
+
48
+ # Ruby Gem doc
49
+ *.gem
50
+ doc/*
51
+
data/ChangeLog ADDED
@@ -0,0 +1,5 @@
1
+ -----
2
+ (Version: 1.0)
3
+ 2019-10-26 Masa Sakano
4
+
5
+ * Initial commit (I think pretty complete).
data/LICENSE.txt ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2012-2018 Scott Chacon and others
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/Makefile ADDED
@@ -0,0 +1,23 @@
1
+ ALL =
2
+
3
+ objs =
4
+
5
+ .SUFFIXES: .so .o .c .f
6
+
7
+ #.o.so:
8
+ # ${LD} ${LFLAGS} -o $@ $< ${LINK_LIB}
9
+
10
+ all: ${ALL}
11
+
12
+
13
+ .PHONY: clean test doc
14
+ clean:
15
+ $(RM) bin/*~
16
+
17
+ ## You may need RUBYLIB=`pwd`/lib:$RUBYLIB
18
+ test:
19
+ rake test
20
+
21
+ doc:
22
+ yard doc
23
+
data/README.en.rdoc ADDED
@@ -0,0 +1,153 @@
1
+
2
+ = Command to print out Unicode characters that satisfy expressions for Ruby Regexp
3
+
4
+ == Summary
5
+
6
+ This package contains a standalone Ruby (executable) source file
7
+ +bin/ruby_unicode_prop+ - a command to be used from terminals etc, and it outputs (to STDOUT)
8
+ Unicode characters and/or their hexagonal codepoints that satisfy one or more given expressions
9
+ defined in Regexp in Ruby. Specifically, they are +\p{XXX}+ -type expressions (e.g., +\p{Katakana}+)
10
+ for Unicode, as well as +[[:blank:]]+ -type expressions for POSIX representation.
11
+
12
+ Some supplementary files are found in the top and +test+ directories,
13
+ none of which is essential to run the command.
14
+
15
+ == Description
16
+
17
+ The help doc is viewable with +-h+ (or +--help+) option, which all the basics:
18
+
19
+ % /YOUR/INSTALLED/PATH/ruby_unicode_prop -h
20
+ USAGE: ruby_unicode_prop [options] Property1 [Property2, ...]
21
+ Print all the characters and/or their hex-codepoints that have
22
+ the given "Unicode property" used in Ruby Regexp like \p{Currency_Symbol}
23
+ (or POSIX expression like [[:blank:]] if -p option is given).
24
+
25
+ Options:
26
+ -c, --[no-]without-codepoint Print characters only? (Def: false)
27
+ -n, --[no-]without-char Print codepoints only? (Def: false)
28
+ -d, --delimiter=CHAR Delimeter in output.
29
+ -l, --[no-]lowercase Lower cases alphabets are used for Hex in codepoints (Def: false)
30
+ -p, --[no-]posix Use POSIX expression instead of Unicode (Def: false)
31
+ --[no-]list-property Print all the Ruby Unicode properties and exit.
32
+
33
+ Note1: Delimeter means one
34
+ (1) between multiple characters and codepoints if either of -n or -c is specified
35
+ (Default: Null for -c (characters only) and a new line for -n.
36
+ (2) between the number and character of each pair if both are specified
37
+ (Def: a whitespace), whereas the delimeter between pairs is always a newline.
38
+ To specify a newline as a delimiter, give 'NL'
39
+ Note2: Properties differ for '-p', 'ascii' in POSIX and 'ASCII' in Unicode.
40
+
41
+ The reference file (used in the +-l+ option) is dynamically retrieved
42
+ from https://github.com/k-takata/Onigmo/blob/master/doc/UnicodeProps.txt
43
+ The definition file in the Ruby source tree is at +/enc/unicode/name2ctype.h+
44
+
45
+ === Limitations
46
+
47
+ The output of this command is generated by the Ruby it runs, and hence
48
+ is fully consistent with Regexp matching results with the same
49
+ property names in any applications when you run the same Ruby. That
50
+ also means the output can depend on the version of the Ruby you run,
51
+ because the unicode table has expanded over the years (such as emojis)
52
+ and it will keep doing so.
53
+
54
+ Currently, the searches by this command is limited up to the second
55
+ Supplementary Plane
56
+ ({Supplementary Ideographic Plane}[https://en.wikipedia.org/wiki/Plane_(Unicode)#Supplementary_Ideographic_Plane]), which should be enough in practice
57
+ in most cases now in 2019 and perhaps will be so for some time.
58
+
59
+ In fact, in many practical cases, searching over
60
+ only the {Basic Multilingual Plane}[https://en.wikipedia.org/wiki/Plane_(Unicode)#Basic_Multilingual_Plane] (up to 0xFFFF) is probably
61
+ sufficient, though it seems the second Supplementary Plane does
62
+ include groups of CJK characters some of which are still in use occasionally in
63
+ modern days. The maximum codepoint to search for is defined in the constant
64
+ +MAX_UNICODE_HEX+ near the beginning of the source code. If you set
65
+ it to a lower value, that can speed up the processing considerably,
66
+ potentially noticeably.
67
+
68
+ == Examples
69
+
70
+ A typical example is as follows:
71
+
72
+ % bin/ruby_unicode_prop Greek
73
+ 0370 Ͱ
74
+ 0371 ͱ
75
+ 0372 Ͳ
76
+ ……(snipped)……
77
+ 0391 Α
78
+ 0392 Β
79
+ 0393 Γ
80
+ 0394 Δ
81
+ ……(snipped)
82
+
83
+ For some, POSIX (bracket) expressions are supported:
84
+
85
+ % bin/ruby_unicode_prop -p -d '___ ' punct
86
+ 0021___ !
87
+ 0022___ "
88
+ 0023___ #
89
+ 0024___ $
90
+ 0025___ %
91
+ 0026___ &
92
+ 0027___ '
93
+ 0028___ (
94
+ 0029___ )
95
+ 002A___ *
96
+ 002B___ +
97
+ ……(snipped")
98
+
99
+ Note the corresponding property name for Unicode +\p{}+ (a backslash
100
+ followed by +p+ and curly brackets) is +Punct+ — it is capitalized,
101
+ compared with the POSIX expression name.
102
+
103
+ Or, you can specify multiple properties. The order of the argument
104
+ does not matter and the result is always in the order of the
105
+ codepoints. No duplication is produced, even if some of the
106
+ specified properties have overlapped ranges of characters. An example is,
107
+
108
+ % bin/ruby_unicode_prop -c Number Terminal_Punctuation
109
+ !,.0123456789:;?²³¹¼½¾;……(snipped)
110
+ % bin/ruby_unicode_prop -c Number Terminal_Punctuation Close_Punctuation
111
+ !),.0123456789:;?]}²³¹¼½¾;……(snipped)
112
+
113
+ == Install
114
+
115
+ This script requires {Ruby}[http://www.ruby-lang.org] Version 2.0
116
+ or above.
117
+
118
+ If you install it as the standard Ruby Gem package, the executable +bin/ruby_unicode_prop+
119
+ should be located automatically in your command-line search path.
120
+
121
+ If not, place (copy) it in any of your command-line search paths. It
122
+ is a self-contained single file and does not need any external
123
+ optional library except the standard library that come in default with Ruby 2.0.
124
+
125
+ You may need to modify the first line (Shebang line) of the script to suit your
126
+ environment (it should be unnecessary for Linux and macOS), or run it
127
+ explicitly with your Ruby command as
128
+
129
+ Prompt% /YOUR/ENV/ruby /YOUR/INSTALLED/ruby_unicode_prop
130
+
131
+ == Developer's note
132
+
133
+ The master of this README file is found in
134
+ [RubyGems/plain_text](https://rubygems.org/gems/ruby_unicode_prop)
135
+
136
+ === Tests
137
+
138
+ Ruby codes under the directory <tt>test/</tt> are the test scripts.
139
+ You can run them from the top directory as <tt>ruby test/test_****.rb</tt>
140
+ or simply run <tt>make test</tt>.
141
+
142
+
143
+ == Known bugs and Todo items
144
+
145
+ None.
146
+
147
+
148
+ == Copyright
149
+
150
+ Author:: Masa Sakano < info a_t wisebabel dot com >
151
+ Versions:: The versions of this package follow Semantic Versioning (2.0.0) http://semver.org/
152
+ License:: MIT
153
+
data/Rakefile ADDED
@@ -0,0 +1,9 @@
1
+ require 'rake/testtask'
2
+
3
+ Rake::TestTask.new do |t|
4
+ t.libs << 'test'
5
+ end
6
+
7
+ desc "Run tests"
8
+ task :default => :test
9
+
@@ -0,0 +1,186 @@
1
+ #!/usr/bin/env ruby
2
+ # -*- coding: utf-8 -*-
3
+
4
+ # Script to print a list of either/both characters or hexadecimal codepoints of
5
+ # the specified Unicode properties.
6
+ #
7
+ # @author: M. Sakano (Wise Babel Ltd)
8
+
9
+ require 'optparse'
10
+ require 'net/http'
11
+ require 'uri'
12
+
13
+ URL_RUBY_UNICODE_PROPS = 'https://raw.githubusercontent.com/k-takata/Onigmo/master/doc/UnicodeProps.txt'
14
+ MAX_UNICODE_HEX = 0x2FFFF # Plane 0-2 (up to Supplementary Ideographic Plane) of Unicode
15
+
16
+ BANNER = <<"__EOF__"
17
+ USAGE: #{File.basename($0)} [options] Property1 [Property2, ...]
18
+ Print all the characters and/or their hex-codepoints that have
19
+ the given "Unicode property" used in Ruby Regexp like \\p{Currency_Symbol}
20
+ (or POSIX expression like [[:blank:]] if -p option is given).
21
+ __EOF__
22
+
23
+ # Initialising the hash for the command-line options.
24
+ OPTS = {
25
+ with_char: true,
26
+ with_codepoint: true,
27
+ delimiter: nil,
28
+ posix: false,
29
+ lowercase: false,
30
+ list_property: false,
31
+ # :chatter => 3, # Default
32
+ debug: false,
33
+ }
34
+
35
+ # Function to handle the command-line arguments.
36
+ #
37
+ # ARGV will be modified, and the constant variable OPTS is set.
38
+ #
39
+ # @return [Hash] Optional-argument hash.
40
+ #
41
+ def handle_argv
42
+ opt = OptionParser.new(BANNER)
43
+ opt.separator ""
44
+ opt.separator "Options:"
45
+ opt.on('-c', '--[no-]without-codepoint', sprintf("Print characters only? (Def: %s)", (!OPTS[:with_codepoint]).inspect), FalseClass){|v| OPTS[:with_codepoint] = v} # memo: "-c" for "Characters only"
46
+ opt.on('-n', '--[no-]without-char', sprintf("Print codepoints only? (Def: %s)", (!OPTS[:with_char]).inspect), FalseClass){|v| OPTS[:with_char] = v} # memo: "-n" for "codepoints Number only"
47
+ opt.on('-d', '--delimiter=CHAR', sprintf("Delimeter in output.", OPTS[:delimiter].inspect)) {|v| OPTS[:delimiter] = v}
48
+ opt.on('-l', '--[no-]lowercase', sprintf("Lower cases alphabets are used for Hex in codepoints (Def: %s)", OPTS[:lowercase].inspect)) {|v| OPTS[:lowercase] = v} # memo: "-l" for "Lower case"
49
+ opt.on('-p', '--[no-]posix', sprintf("Use POSIX expression instead of Unicode (Def: %s)", OPTS[:posix].inspect)) {|v| OPTS[:posix] = v} # memo: "-p" for "POSIX"
50
+ opt.on( '--[no-]list-property', 'Print all the Ruby Unicode properties and exit.') {|v| OPTS[:list_property] = v}
51
+ # opt.on( '--version', "Display the version and exits.", TrueClass) {|v| OPTS[:version] = v} # Consider opts.on_tail
52
+ # opt.on( '--[no-]debug', "Debug (Def: false)") {|v| OPTS[:debug] = v}
53
+ opt.separator ""
54
+ opt.separator "Note1: Delimeter means one"
55
+ opt.separator " (1) between multiple characters and codepoints if either of -n or -c is specified (Default: Null for -c (characters only) and a new line for -n."
56
+ opt.separator " (2) between the number and character of each pair if both are specified (Def: a whitespace), whereas the delimeter between pairs is always a newline."
57
+ opt.separator " To specify a newline as a delimiter, give 'NL'"
58
+ opt.separator "Note2: Properties differ for '-p', 'ascii' in POSIX and 'ASCII' in Unicode."
59
+ opt.separator "Reference: https://github.com/k-takata/Onigmo/blob/master/doc/UnicodeProps.txt"
60
+ # opt.separator " Ruby-Source: /enc/unicode/name2ctype.h"
61
+
62
+ opt.parse!(ARGV)
63
+
64
+ # exit if --list-property
65
+ (puts get_file_unicode_properties; exit 1) if OPTS[:list_property]
66
+
67
+ if !OPTS[:with_char] && !OPTS[:with_codepoint]
68
+ warn "Specify (or do not specify at all) what to print (do not specify -p and -c simultaneously)."
69
+ exit 1
70
+ end
71
+
72
+ if ARGV.size == 0
73
+ warn "ERROR: Specify Unicode property(ies). To see help, run #{File.basename($0)} --help"
74
+ exit 1
75
+ end
76
+
77
+ # Adjustments
78
+ OPTS[:delimiter] = "\n" if 'NL' == OPTS[:delimiter] # Special case; 'NL' means a newline.
79
+ OPTS[:delimiter] ||=
80
+ if OPTS[:with_char] && OPTS[:with_codepoint]
81
+ " "
82
+ elsif OPTS[:with_char]
83
+ ""
84
+ else
85
+ $/
86
+ end
87
+
88
+ OPTS
89
+ end
90
+
91
+ # Returns the array of characters that satisfies the Property conditions.
92
+ #
93
+ # @param argv [ARGV]
94
+ # @param opts [Hash] Hash of command-line options.
95
+ # @return [Array]
96
+ def get_ary_chars(argv, opts)
97
+ if opts[:posix]
98
+ # for 'ascii' and 'blank', joined as [[:ascii:][:blank:]]
99
+ fmt1 = '[%s]'
100
+ fmt2 = '[:%s:]'
101
+ fmt_deli = ""
102
+ else
103
+ # for 'ASCII' and 'Digit', joined as (?:\p{ASCII}|\p{Digit})
104
+ fmt1 = '(?:%s)'
105
+ fmt2 = '\\p{%s}'
106
+ fmt_deli = '|'
107
+ end
108
+
109
+ prop_q = fmt1 % argv.map{|c| fmt2 % Regexp.quote(c)}.join(fmt_deli)
110
+
111
+ fmt0 = (opts[:lowercase] ? "%04x" : "%04X")
112
+
113
+ arret = []
114
+ begin
115
+ (0..MAX_UNICODE_HEX).each do |i|
116
+ s = i.chr(Encoding::UTF_8) rescue next # invalid codepoint 0xD800 in UTF-8 (RangeError)
117
+ next if Regexp.new(prop_q) !~ s
118
+ arret <<
119
+ if opts[:with_char] && opts[:with_codepoint]
120
+ (fmt0+"%s%s") % [i, opts[:delimiter], s]
121
+ elsif opts[:with_char]
122
+ s
123
+ else
124
+ fmt0 % i
125
+ end
126
+ end
127
+ rescue RegexpError => er
128
+ warn '(RegexpError) '+er.message
129
+ exit 1
130
+ end
131
+
132
+ arret
133
+ end
134
+
135
+
136
+ # Returns the array of characters that satisfies the Property conditions.
137
+ #
138
+ # If encountering ERROR, this directly exits!
139
+ #
140
+ # @return [String]
141
+ def get_file_unicode_properties
142
+ url = URL_RUBY_UNICODE_PROPS
143
+ #url = "http://google.com/naiyo.txt"
144
+ #url = "http://googlegoo345.com/naiyo.txt"
145
+ begin
146
+ resp = Net::HTTP.get_response( URI.parse url )
147
+ rescue SocketError => er
148
+ warn(er.message + "\nERROR: Host not reachable: " + url)
149
+ exit 1
150
+ end
151
+
152
+ return "Fetched from: %s\n%s" % [url, resp.body] if resp.code.to_i == 200
153
+
154
+ warn "ERROR in HTTP response (#{resp.code}) - File not found or something: "+url
155
+ exit 1
156
+ end
157
+
158
+
159
+ ################################################
160
+ # MAIN
161
+ ################################################
162
+
163
+ $stdout.sync=true
164
+ $stderr.sync=true
165
+
166
+ # Handle the command-line options => OPTS
167
+ begin
168
+ handle_argv()
169
+ rescue OptionParser::InvalidOption, OptionParser::AmbiguousOption => er
170
+ warn er
171
+ exit 1
172
+ end
173
+
174
+ arret = get_ary_chars(ARGV, OPTS)
175
+
176
+ if OPTS[:with_char] && OPTS[:with_codepoint]
177
+ puts arret.join($/)
178
+ else
179
+ puts arret.join(OPTS[:delimiter])
180
+ end
181
+
182
+ exit
183
+
184
+ __END__
185
+
186
+
@@ -0,0 +1,50 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ require 'rake'
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = %q{ruby_unicode_prop}.sub(/.*/){|c| (c == File.basename(Dir.pwd)) ? c : raise("ERROR: s.name=(#{c}) in gemspec seems wrong!")}
7
+ s.version = "1.0"
8
+ # s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
9
+ s.bindir = 'bin'
10
+ %w(ruby_unicode_prop).each do |f|
11
+ path = s.bindir+'/'+f
12
+ File.executable?(path) ? s.executables << f : raise("ERROR: Executable (#{path}) is not executable!")
13
+ end
14
+ s.authors = ["Masa Sakano"]
15
+ s.date = %q{2019-10-26}
16
+ s.summary = %q{Command to print the characters and hex-codepoints with the given Unicode properties}
17
+ s.description = %q{This module provides utility functions and methods to handle plain text, classes Part/Paragraph/Boundary to represent the logical structure of a document and ParseRule to describe the rules to parse plain text to produce a Part-type Ruby instance.}
18
+ # s.email = %q{abc@example.com}
19
+ s.extra_rdoc_files = [
20
+ #"LICENSE.txt",
21
+ "README.en.rdoc",
22
+ ]
23
+ s.license = 'MIT'
24
+ s.files = FileList['.gitignore','lib/**/*.rb','[A-Z]*','test/**/*.rb', '*.gemspec', 'bin'].to_a.delete_if{ |f|
25
+ ret = false
26
+ arignore = IO.readlines('.gitignore')
27
+ arignore.map{|i| i.chomp}.each do |suffix|
28
+ if File.fnmatch(suffix, File.basename(f))
29
+ ret = true
30
+ break
31
+ end
32
+ end
33
+ ret
34
+ }
35
+ s.files.reject! { |fn| File.symlink? fn }
36
+ # s.add_runtime_dependency 'rails'
37
+ # s.add_development_dependency "bourne", [">= 0"]
38
+ s.homepage = %q{https://www.wisebabel.com}
39
+ s.rdoc_options = ["--charset=UTF-8"]
40
+
41
+ # s.require_paths = ["lib"] # Default "lib"
42
+ s.required_ruby_version = '>= 2.0'
43
+ s.test_files = Dir['test/**/*.rb']
44
+ s.test_files.reject! { |fn| File.symlink? fn }
45
+ # s.requirements << 'libmagick, v6.0' # Simply, info to users.
46
+ # s.rubygems_version = %q{1.3.5} # This is always set automatically!!
47
+
48
+ s.metadata["yard.run"] = "yri" # use "yard" to build full HTML docs.
49
+ end
50
+
@@ -0,0 +1,140 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ # @author: M. Sakano (Wise Babel Ltd)
4
+
5
+ require 'plain_text'
6
+ require 'open3'
7
+
8
+ $stdout.sync=true
9
+ $stderr.sync=true
10
+ # print '$LOAD_PATH=';p $LOAD_PATH
11
+
12
+ #################################################
13
+ # Unit Test
14
+ #################################################
15
+
16
+ gem "minitest"
17
+ # require 'minitest/unit'
18
+ require 'minitest/autorun'
19
+
20
+ class TestUnitRubyUnicodeProp < MiniTest::Test
21
+ T = true
22
+ F = false
23
+ SCFNAME = File.basename(__FILE__)
24
+ EXE = "%s/../bin/%s" % [File.dirname(__FILE__), File.basename(__FILE__).sub(/^test_(.+)\.rb/, '\1')]
25
+
26
+ def setup
27
+ end
28
+
29
+ def teardown
30
+ end
31
+
32
+ def test_ruby_unicode_prop01
33
+ o, e, s = Open3.capture3 EXE+" ASCII"
34
+ assert_equal 0, s.exitstatus
35
+ assert_match(/\A0000 .\n0001 .\n.*\n004A J/m, o)
36
+ assert_operator 70, '<', o.count($/)
37
+ assert_empty e
38
+ size_ascii = o.size
39
+
40
+ o, e, s = Open3.capture3 EXE+" -l ASCII"
41
+ assert_equal 0, s.exitstatus
42
+ assert_match(/\A0000 .\n0001 .\n.*\n004a J/m, o)
43
+
44
+ o, e, s = Open3.capture3 EXE+" ascii"
45
+ assert_equal 0, s.exitstatus
46
+ assert_match(/\A0000 .\n0001 .\n.*\n004A J/m, o)
47
+ assert_operator 70, '<', o.count($/)
48
+ assert_empty e
49
+
50
+ o, e, s = Open3.capture3 EXE+" -p ASCII" # => Error (b/c "ascii" in the POSIX form)
51
+ assert_equal 1, s.exitstatus
52
+ assert_match(/invalid POSIX/i, e)
53
+
54
+ o, e, s = Open3.capture3 EXE+" -d H ASCII"
55
+ assert_equal 0, s.exitstatus
56
+ assert_match(/\A0000H.\n0001H.\n.*\n004AHJ/m, o)
57
+ o, e, s = Open3.capture3 EXE+" --delimiter=H ASCII"
58
+ assert_match(/\A0000H.\n0001H.\n.*\n004AHJ/m, o)
59
+
60
+ o, e, s = Open3.capture3 EXE+" --without-codepoint ASCII" # characters only (-c)
61
+ assert_equal 0, s.exitstatus
62
+ assert_equal 0, o[40..-1].chomp.count($/) # "\n" is included in ASCII itself.
63
+ assert_equal 1, o.count(" ")
64
+ assert_match(/XYZ/m, o)
65
+ assert_empty e
66
+
67
+ o, e, s = Open3.capture3 EXE+" -c -d H ASCII" # characters only (-c)
68
+ assert_equal 0, o[40..-1].chomp.count($/) # "\n" is included in ASCII itself.
69
+ assert_equal 1, o.count(" ")
70
+ assert_match(/XHYHZ/m, o)
71
+ assert_empty e
72
+
73
+ o, e, s = Open3.capture3 EXE+" -c -d NL ASCII" # characters only (-c)
74
+ assert_operator 70, '<', o.count($/), "Special case of 'NL' is not handled correctly."
75
+ assert_equal 1, o.count(" ")
76
+ assert_match(/X\nY\nZ/m, o)
77
+ assert_empty e
78
+
79
+ o, e, s = Open3.capture3 EXE+" --without-char ASCII" # codepoints only (-n)
80
+ assert_equal 0, o.count(" ")
81
+ assert_equal 0, o.count("X")
82
+ assert_operator 70, '<', o.count($/)
83
+ assert_equal "0000\n0001\n", o[0..9]
84
+ assert_empty e
85
+
86
+ o, e, s = Open3.capture3 EXE+" -n -d H ASCII" # codepoints only (-n)
87
+ assert_equal 0, o[60..-1].chomp.count($/) # "\n" is included in ASCII itself.
88
+ assert_operator 70, '<', o.count(?H)
89
+ assert_equal "0000H0001H", o[0..9]
90
+ assert_match(/\A0000H0001H.*H004AH/m, o)
91
+ assert_empty e
92
+
93
+ o, e, s = Open3.capture3 EXE+" -l -n -d H ASCII" # codepoints only (-n)
94
+ assert_match(/\A0000H0001H.*H004aH/m, o)
95
+
96
+ # Multiple arguments
97
+ o, e, s = Open3.capture3 EXE+" Currency_Symbol ASCII"
98
+ assert_equal 0, s.exitstatus
99
+ assert_match(/0023 \#\n0024 \$\n.*00A3 £/m, o)
100
+ assert_operator size_ascii, '<', o.size # Increased size (because Currency Symbols are added!)
101
+
102
+ o, e, s = Open3.capture3 EXE+" ASCII Digit"
103
+ ou_ad = o
104
+ si_ad = o.size
105
+ o, e, s = Open3.capture3 EXE+" Digit ASCII"
106
+ ou_da = o
107
+ o, e, s = Open3.capture3 EXE+" Digit"
108
+ size_digit = o.size
109
+
110
+ assert_equal ou_ad, ou_da, 'Should be unordered for multiple arguments.'
111
+ assert_operator si_ad, '<', size_ascii + size_digit, 'Duplication should not appear.'
112
+ end
113
+
114
+
115
+ ## tests of Errors ##
116
+ def test_ruby_unicode_prop_error02
117
+ o, e, s = Open3.capture3 EXE+" ASCII -d" # => Error (-d without a parameter)
118
+ assert_equal 1, s.exitstatus
119
+ assert_empty o
120
+
121
+ o, e, s = Open3.capture3 EXE+" -n -c ASCII" # => Error
122
+ assert_equal 1, s.exitstatus
123
+ assert_empty o
124
+ assert_match(/specify/m, e)
125
+
126
+ o, e, s = Open3.capture3 EXE+" -Z ASCII" # => Error
127
+ assert_equal 1, s.exitstatus
128
+ assert_match(/(invalid|ambiguous) option/i, e)
129
+
130
+ o, e, s = Open3.capture3 EXE+" naiyo" # => Error
131
+ assert_equal 1, s.exitstatus
132
+ assert_empty o
133
+ assert_match(/RegexpError/m, e)
134
+
135
+ o, e, s = Open3.capture3 EXE # => Error (No arguments specified)
136
+ assert_equal 1, s.exitstatus
137
+ assert_empty o
138
+ end
139
+ end # class TestUnitRubyUnicodeProp < MiniTest::Test
140
+
metadata ADDED
@@ -0,0 +1,60 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: ruby_unicode_prop
3
+ version: !ruby/object:Gem::Version
4
+ version: '1.0'
5
+ platform: ruby
6
+ authors:
7
+ - Masa Sakano
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2019-10-26 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: This module provides utility functions and methods to handle plain text,
14
+ classes Part/Paragraph/Boundary to represent the logical structure of a document
15
+ and ParseRule to describe the rules to parse plain text to produce a Part-type Ruby
16
+ instance.
17
+ email:
18
+ executables:
19
+ - ruby_unicode_prop
20
+ extensions: []
21
+ extra_rdoc_files:
22
+ - README.en.rdoc
23
+ files:
24
+ - ".gitignore"
25
+ - ChangeLog
26
+ - LICENSE.txt
27
+ - Makefile
28
+ - README.en.rdoc
29
+ - Rakefile
30
+ - bin/ruby_unicode_prop
31
+ - ruby_unicode_prop.gemspec
32
+ - test/test_ruby_unicode_prop.rb
33
+ homepage: https://www.wisebabel.com
34
+ licenses:
35
+ - MIT
36
+ metadata:
37
+ yard.run: yri
38
+ post_install_message:
39
+ rdoc_options:
40
+ - "--charset=UTF-8"
41
+ require_paths:
42
+ - lib
43
+ required_ruby_version: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '2.0'
48
+ required_rubygems_version: !ruby/object:Gem::Requirement
49
+ requirements:
50
+ - - ">="
51
+ - !ruby/object:Gem::Version
52
+ version: '0'
53
+ requirements: []
54
+ rubygems_version: 3.0.3
55
+ signing_key:
56
+ specification_version: 4
57
+ summary: Command to print the characters and hex-codepoints with the given Unicode
58
+ properties
59
+ test_files:
60
+ - test/test_ruby_unicode_prop.rb