ovec 0.0.5 → 0.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 0283a4198221eac5d155b9543b4688255653b0cd
4
- data.tar.gz: 5da47f5452811131bef97e974c7808e0767a7285
3
+ metadata.gz: 072ab08b381a445811c64db29452dd28f5b1c575
4
+ data.tar.gz: 7fc5d8aa41477ffcf71499df5d44cc88f35a52dd
5
5
  SHA512:
6
- metadata.gz: 7f70d8f5ae05124482fe1ade3270df03d3c00318fcdb63a155dd203a9f4999a009def0cf705a57f4f2fc3e422f185294ba5521aa494dfb474c97311afefa66db
7
- data.tar.gz: d963c7034b41eb9e04f45ebf3b6200c1b975b2c53042cbf9e9c32aa13338715a7eefedbc1833b2f0eb502a80f8fc1a49efbd76fb9c84e49613f17dd9c4cb8e00
6
+ metadata.gz: 22dbbd36ec18a956479d356ba949405b3e211631b3ec89f1f30d0e9435a69a7d1d4590b525b1d909dc57d88974cc80d38f6f2a6ffdf2276f5e25c403667e5aa9
7
+ data.tar.gz: 2e193c2a77a1daefeff432b8ab8da85f8f70807e76e1194268f0170fa72a69c9a844352466df5b3e760612fdcf1d60985a7328c2e12d5f533fb48f00c3704ffc
data/TODO CHANGED
@@ -10,3 +10,4 @@
10
10
 
11
11
  - ne-UTF8 soubory
12
12
  - mista parsovacich chyb
13
+ - vlnkovani na rozhrani radku
data/bin/ovec CHANGED
@@ -6,13 +6,9 @@ require 'optparse'
6
6
  options = {}
7
7
 
8
8
  opts_parser = OptionParser.new do |opts|
9
- opts.banner = "Usage: ovec [options]"
9
+ opts.banner = "Usage: ovec [options] [input file(s)]"
10
10
 
11
- opts.on("-i", "--input FILENAME", "Input filename or - for stdin (required)") do |filename|
12
- options[:input] = filename
13
- end
14
-
15
- opts.on("-o", "--output FILENAME", "Output filename or - for stdout (required)") do |filename|
11
+ opts.on("-o", "--output FILENAME", "Output filename (STDOUT is used if omitted)") do |filename|
16
12
  options[:output] = filename
17
13
  end
18
14
 
@@ -31,19 +27,11 @@ opts_parser = OptionParser.new do |opts|
31
27
  end
32
28
  end
33
29
 
34
- input_file = nil
35
30
  output_file = nil
36
31
 
37
32
  begin
38
33
  opts_parser.parse!
39
- unless options.key?(:input) && options.key?(:output)
40
- $stderr.puts "You must specify both an input file and an output file."
41
- $stderr.puts opts_parser.help
42
- exit 1
43
- end
44
-
45
- input_file = (options[:input] == '-') ? STDIN : File.open(options[:input], "r")
46
- output_file = (options[:output] == '-') ? STDOUT : File.open(options[:output], "w")
34
+ output_file = options[:output] ? File.open(options[:output], "w") : STDOUT
47
35
  rescue OptionParser::ParseError => ex
48
36
  $stderr.puts "Invalid options: #{ex.message}"
49
37
  $stderr.puts opts_parser.help
@@ -53,7 +41,8 @@ rescue StandardError => ex
53
41
  exit 1
54
42
  end
55
43
 
56
- content = input_file.read
44
+ # OptionParser#parse! changes ARGV, so ARGF works here.
45
+ content = ARGF.read
57
46
 
58
47
  parser = Ovec::Parser.new(debug: options[:debug])
59
48
  tree = parser.parse(content)
@@ -66,6 +55,4 @@ tier = Ovec::Tier.new
66
55
  tm.run_text_manipulator(tier)
67
56
 
68
57
  output_file.puts tree.to_tex
69
-
70
- input_file.close
71
58
  output_file.close
data/lib/ovec/tier.rb CHANGED
@@ -4,20 +4,20 @@ module Ovec
4
4
  class Tier < TextManipulator
5
5
  # The last character this regex matches is changed to a tilde.
6
6
  REGEX = /(
7
- ((\p{Z}|\~|\n)[KkSsVvZzOoUu]\p{Z})| # KSVZOU jako samostatne slovo
8
- ([\.\?\!](\p{Z}|\~)+[KSVZOUAI]\p{Z})| # KSVZOUAI na zacatku vety
9
- (\A[KSVZOUAI]\p{Z})| # KSVZOUAI na zacatku textu
7
+ ((\p{Z}|[~\n()\[\]\{\}])[KkSsVvZzOoUu](\p{Z}|\n))| # KSVZOU jako samostatne slovo
8
+ ([\.\?\!](\p{Z}|\~)+[KSVZOUAI](\p{Z}|\n))| # KSVZOUAI na zacatku vety
9
+ (\A[KSVZOUAI](\p{Z}|\n))| # KSVZOUAI na zacatku textu
10
10
  (\p{Z}(?=--(\p{Z}|\n)))| # mezera, za kterou je pomlcka
11
- (,(\p{Z}|\~|\n)+a\p{Z}) # ... modulo 10, a~timto prvkem ...; TODO: plati tohle i pro "i"?
11
+ (,(\p{Z}|\~|\n)+a(\p{Z}|\n)) # ... modulo 10, a~timto prvkem ...; TODO: plati tohle i pro "i"?
12
12
  )/x
13
13
 
14
14
  # TODO: generally tie "5.~batalion", ...
15
15
  # All changes within this regex are changed to a tilde.
16
16
  DATE_REGEX = /(
17
- (?<=\p{Z})\p{Nd}{1,2}\.\p{Z}
17
+ (?<=\p{Z}|\A)\p{Nd}{1,2}\.\p{Z}
18
18
  (\p{Nd}{1,2}\.|leden|únor|březen|duben|květen|červen|červenec|srpen|září|říjen|listopad|prosinec| # TODO: plne sklonovani? nebo nejaky wildcard?
19
19
  ledna|února|března|dubna|května|června|července|srpna|září|října|listopadu|prosince)\p{Z}
20
- \p{Nd}{4}(?=\p{Z}) # Datum jako "1. 5. 2013"
20
+ \p{Nd}{4}(?=(\p{Z}|[.,?!]|\Z)) # Datum jako "1. 5. 2013"
21
21
  )/x
22
22
 
23
23
  def run
@@ -33,7 +33,24 @@ module Ovec
33
33
  match = matches[i]
34
34
  change = match.end(0) - 1
35
35
  chunk, offset = _find_chunk_and_offset(change)
36
+
37
+ former_character = chunk[offset]
38
+
36
39
  chunk[offset] = '~'
40
+
41
+ if former_character == '\n'
42
+ # If we changed a newline to a tilde, change previous space to a
43
+ # newline -- move the tied word to the other line.
44
+ j = change - 1
45
+ while j >= 0
46
+ if @joined[j] == ' '
47
+ chunk, offset = _find_chunk_and_offset(j)
48
+ chunk[offset] = '\n'
49
+ break
50
+ end
51
+ j -= 1
52
+ end
53
+ end
37
54
  end
38
55
 
39
56
  _rejoin
@@ -42,11 +59,10 @@ module Ovec
42
59
 
43
60
  # Dates can't overlap. 1 scan is enough.
44
61
  matches = @joined.to_enum(:scan, DATE_REGEX).map { Regexp.last_match }
45
- for i in 0...matches.length
46
- match = matches[i]
47
- for j in match.begin...match.end
48
- if @joined[j] == ' '
49
- chunk, offset = _find_chunk_and_offset(j)
62
+ for match in matches
63
+ for i in (match.begin(0))...(match.end(0))
64
+ if @joined[i] == ' '
65
+ chunk, offset = _find_chunk_and_offset(i)
50
66
  chunk[offset] = '~'
51
67
  end
52
68
  end
data/lib/ovec/version.rb CHANGED
@@ -1,4 +1,4 @@
1
1
  module Ovec
2
- VERSION = "0.0.5"
2
+ VERSION = "0.0.6"
3
3
  NAME_WITH_VERSION = "Ovec #{VERSION}"
4
4
  end
@@ -6,32 +6,43 @@ module Ovec
6
6
  @tier = Tier.new
7
7
  end
8
8
 
9
+ private
10
+ def assert_ties_to(input, output)
11
+ parser = Ovec::Parser.new(debug: true)
12
+ tree = parser.parse(input.dup)
13
+
14
+ tm = Ovec::TexManipulator.new
15
+ tm.bind(tree)
16
+
17
+ tm.run_text_manipulator(@tier)
18
+
19
+ text = tree.to_tex
20
+
21
+ assert_equal output, text
22
+ end
23
+
24
+ public
9
25
  def test_basic_without_ties
10
26
  text = "Ahoj. Jak se máš?"
11
27
  text_duplicate = text.dup
12
- @tier.bind([text_duplicate])
13
- @tier.run
14
- assert_equal text, text_duplicate
15
- end
16
28
 
17
- private
18
- def assert_ties_to(input, output)
19
- input = [input] if input.is_a? String
20
- output = [output] if output.is_a? String
21
- text = input.dup
22
- @tier.bind(test)
23
- @tier.run
24
- assert_equal text, outpu
29
+ parser = Ovec::Parser.new(debug: true)
30
+ tree = parser.parse(text)
31
+
32
+ tm = Ovec::TexManipulator.new
33
+ tm.bind(tree)
34
+
35
+ tm.run_text_manipulator(@tier)
36
+
37
+ text = tree.to_tex
38
+
39
+ assert_equal text, text_duplicate
25
40
  end
26
41
 
27
42
  def test_simple_tie
28
43
  assert_ties_to "K blabla u blabla s blabla.", "K~blabla u~blabla s~blabla."
29
44
  end
30
45
 
31
- def test_array_tie
32
- assert_ties_to [ "K blabla u", " blabla ", "s blabla.", " A blabla?" ], [ "K~blabla u", "~blabla ", "s~blabla.", " A~blabla?" ]
33
- end
34
-
35
46
  def test_regex_works
36
47
  regex = Tier::REGEX
37
48
  assert !("ahoj" =~ regex)
@@ -50,7 +61,7 @@ module Ovec
50
61
  end
51
62
 
52
63
  def test_tie_across_newline
53
- assert_ties_to "Pojednani pojednavajici\no pojednavani.", "Pojednavani pojednavajici\no~pojednavani."
64
+ assert_ties_to "Pojednavani pojednavajici\no pojednavani.", "Pojednavani pojednavajici\no~pojednavani."
54
65
  end
55
66
 
56
67
  def test_tie_a_after_pause
@@ -65,5 +76,16 @@ module Ovec
65
76
  def test_tie_various
66
77
  assert_ties_to "Je-li x sudé, je dělitelné dvěma (v opačném případě není).", "Je-li x sudé, je dělitelné dvěma (v~opačném případě není)."
67
78
  end
79
+
80
+ def test_tie_in_newline
81
+ assert_ties_to "V\nrámci\ntohohle", "V~rámci\ntohohle"
82
+ assert_ties_to "V\nrámci tohohle", "V~rámci tohohle"
83
+ end
84
+
85
+ def test_date_regex_ok
86
+ assert "10" =~ /\A\p{Nd}*\Z/
87
+ assert "1. 3. 2013" =~ Tier::DATE_REGEX
88
+ assert "Bylo zrovna 1. 3. 2013." =~ Tier::DATE_REGEX
89
+ end
68
90
  end
69
91
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ovec
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.5
4
+ version: 0.0.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Michal Pokorný