ovec 0.0.5 → 0.0.6

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 0283a4198221eac5d155b9543b4688255653b0cd
4
- data.tar.gz: 5da47f5452811131bef97e974c7808e0767a7285
3
+ metadata.gz: 072ab08b381a445811c64db29452dd28f5b1c575
4
+ data.tar.gz: 7fc5d8aa41477ffcf71499df5d44cc88f35a52dd
5
5
  SHA512:
6
- metadata.gz: 7f70d8f5ae05124482fe1ade3270df03d3c00318fcdb63a155dd203a9f4999a009def0cf705a57f4f2fc3e422f185294ba5521aa494dfb474c97311afefa66db
7
- data.tar.gz: d963c7034b41eb9e04f45ebf3b6200c1b975b2c53042cbf9e9c32aa13338715a7eefedbc1833b2f0eb502a80f8fc1a49efbd76fb9c84e49613f17dd9c4cb8e00
6
+ metadata.gz: 22dbbd36ec18a956479d356ba949405b3e211631b3ec89f1f30d0e9435a69a7d1d4590b525b1d909dc57d88974cc80d38f6f2a6ffdf2276f5e25c403667e5aa9
7
+ data.tar.gz: 2e193c2a77a1daefeff432b8ab8da85f8f70807e76e1194268f0170fa72a69c9a844352466df5b3e760612fdcf1d60985a7328c2e12d5f533fb48f00c3704ffc
data/TODO CHANGED
@@ -10,3 +10,4 @@
10
10
 
11
11
  - ne-UTF8 soubory
12
12
  - mista parsovacich chyb
13
+ - vlnkovani na rozhrani radku
data/bin/ovec CHANGED
@@ -6,13 +6,9 @@ require 'optparse'
6
6
  options = {}
7
7
 
8
8
  opts_parser = OptionParser.new do |opts|
9
- opts.banner = "Usage: ovec [options]"
9
+ opts.banner = "Usage: ovec [options] [input file(s)]"
10
10
 
11
- opts.on("-i", "--input FILENAME", "Input filename or - for stdin (required)") do |filename|
12
- options[:input] = filename
13
- end
14
-
15
- opts.on("-o", "--output FILENAME", "Output filename or - for stdout (required)") do |filename|
11
+ opts.on("-o", "--output FILENAME", "Output filename (STDOUT is used if omitted)") do |filename|
16
12
  options[:output] = filename
17
13
  end
18
14
 
@@ -31,19 +27,11 @@ opts_parser = OptionParser.new do |opts|
31
27
  end
32
28
  end
33
29
 
34
- input_file = nil
35
30
  output_file = nil
36
31
 
37
32
  begin
38
33
  opts_parser.parse!
39
- unless options.key?(:input) && options.key?(:output)
40
- $stderr.puts "You must specify both an input file and an output file."
41
- $stderr.puts opts_parser.help
42
- exit 1
43
- end
44
-
45
- input_file = (options[:input] == '-') ? STDIN : File.open(options[:input], "r")
46
- output_file = (options[:output] == '-') ? STDOUT : File.open(options[:output], "w")
34
+ output_file = options[:output] ? File.open(options[:output], "w") : STDOUT
47
35
  rescue OptionParser::ParseError => ex
48
36
  $stderr.puts "Invalid options: #{ex.message}"
49
37
  $stderr.puts opts_parser.help
@@ -53,7 +41,8 @@ rescue StandardError => ex
53
41
  exit 1
54
42
  end
55
43
 
56
- content = input_file.read
44
+ # OptionParser#parse! changes ARGV, so ARGF works here.
45
+ content = ARGF.read
57
46
 
58
47
  parser = Ovec::Parser.new(debug: options[:debug])
59
48
  tree = parser.parse(content)
@@ -66,6 +55,4 @@ tier = Ovec::Tier.new
66
55
  tm.run_text_manipulator(tier)
67
56
 
68
57
  output_file.puts tree.to_tex
69
-
70
- input_file.close
71
58
  output_file.close
data/lib/ovec/tier.rb CHANGED
@@ -4,20 +4,20 @@ module Ovec
4
4
  class Tier < TextManipulator
5
5
  # The last character this regex matches is changed to a tilde.
6
6
  REGEX = /(
7
- ((\p{Z}|\~|\n)[KkSsVvZzOoUu]\p{Z})| # KSVZOU jako samostatne slovo
8
- ([\.\?\!](\p{Z}|\~)+[KSVZOUAI]\p{Z})| # KSVZOUAI na zacatku vety
9
- (\A[KSVZOUAI]\p{Z})| # KSVZOUAI na zacatku textu
7
+ ((\p{Z}|[~\n()\[\]\{\}])[KkSsVvZzOoUu](\p{Z}|\n))| # KSVZOU jako samostatne slovo
8
+ ([\.\?\!](\p{Z}|\~)+[KSVZOUAI](\p{Z}|\n))| # KSVZOUAI na zacatku vety
9
+ (\A[KSVZOUAI](\p{Z}|\n))| # KSVZOUAI na zacatku textu
10
10
  (\p{Z}(?=--(\p{Z}|\n)))| # mezera, za kterou je pomlcka
11
- (,(\p{Z}|\~|\n)+a\p{Z}) # ... modulo 10, a~timto prvkem ...; TODO: plati tohle i pro "i"?
11
+ (,(\p{Z}|\~|\n)+a(\p{Z}|\n)) # ... modulo 10, a~timto prvkem ...; TODO: plati tohle i pro "i"?
12
12
  )/x
13
13
 
14
14
  # TODO: generally tie "5.~batalion", ...
15
15
  # All changes within this regex are changed to a tilde.
16
16
  DATE_REGEX = /(
17
- (?<=\p{Z})\p{Nd}{1,2}\.\p{Z}
17
+ (?<=\p{Z}|\A)\p{Nd}{1,2}\.\p{Z}
18
18
  (\p{Nd}{1,2}\.|leden|únor|březen|duben|květen|červen|červenec|srpen|září|říjen|listopad|prosinec| # TODO: plne sklonovani? nebo nejaky wildcard?
19
19
  ledna|února|března|dubna|května|června|července|srpna|září|října|listopadu|prosince)\p{Z}
20
- \p{Nd}{4}(?=\p{Z}) # Datum jako "1. 5. 2013"
20
+ \p{Nd}{4}(?=(\p{Z}|[.,?!]|\Z)) # Datum jako "1. 5. 2013"
21
21
  )/x
22
22
 
23
23
  def run
@@ -33,7 +33,24 @@ module Ovec
33
33
  match = matches[i]
34
34
  change = match.end(0) - 1
35
35
  chunk, offset = _find_chunk_and_offset(change)
36
+
37
+ former_character = chunk[offset]
38
+
36
39
  chunk[offset] = '~'
40
+
41
+ if former_character == '\n'
42
+ # If we changed a newline to a tilde, change previous space to a
43
+ # newline -- move the tied word to the other line.
44
+ j = change - 1
45
+ while j >= 0
46
+ if @joined[j] == ' '
47
+ chunk, offset = _find_chunk_and_offset(j)
48
+ chunk[offset] = '\n'
49
+ break
50
+ end
51
+ j -= 1
52
+ end
53
+ end
37
54
  end
38
55
 
39
56
  _rejoin
@@ -42,11 +59,10 @@ module Ovec
42
59
 
43
60
  # Dates can't overlap. 1 scan is enough.
44
61
  matches = @joined.to_enum(:scan, DATE_REGEX).map { Regexp.last_match }
45
- for i in 0...matches.length
46
- match = matches[i]
47
- for j in match.begin...match.end
48
- if @joined[j] == ' '
49
- chunk, offset = _find_chunk_and_offset(j)
62
+ for match in matches
63
+ for i in (match.begin(0))...(match.end(0))
64
+ if @joined[i] == ' '
65
+ chunk, offset = _find_chunk_and_offset(i)
50
66
  chunk[offset] = '~'
51
67
  end
52
68
  end
data/lib/ovec/version.rb CHANGED
@@ -1,4 +1,4 @@
1
1
  module Ovec
2
- VERSION = "0.0.5"
2
+ VERSION = "0.0.6"
3
3
  NAME_WITH_VERSION = "Ovec #{VERSION}"
4
4
  end
@@ -6,32 +6,43 @@ module Ovec
6
6
  @tier = Tier.new
7
7
  end
8
8
 
9
+ private
10
+ def assert_ties_to(input, output)
11
+ parser = Ovec::Parser.new(debug: true)
12
+ tree = parser.parse(input.dup)
13
+
14
+ tm = Ovec::TexManipulator.new
15
+ tm.bind(tree)
16
+
17
+ tm.run_text_manipulator(@tier)
18
+
19
+ text = tree.to_tex
20
+
21
+ assert_equal output, text
22
+ end
23
+
24
+ public
9
25
  def test_basic_without_ties
10
26
  text = "Ahoj. Jak se máš?"
11
27
  text_duplicate = text.dup
12
- @tier.bind([text_duplicate])
13
- @tier.run
14
- assert_equal text, text_duplicate
15
- end
16
28
 
17
- private
18
- def assert_ties_to(input, output)
19
- input = [input] if input.is_a? String
20
- output = [output] if output.is_a? String
21
- text = input.dup
22
- @tier.bind(test)
23
- @tier.run
24
- assert_equal text, outpu
29
+ parser = Ovec::Parser.new(debug: true)
30
+ tree = parser.parse(text)
31
+
32
+ tm = Ovec::TexManipulator.new
33
+ tm.bind(tree)
34
+
35
+ tm.run_text_manipulator(@tier)
36
+
37
+ text = tree.to_tex
38
+
39
+ assert_equal text, text_duplicate
25
40
  end
26
41
 
27
42
  def test_simple_tie
28
43
  assert_ties_to "K blabla u blabla s blabla.", "K~blabla u~blabla s~blabla."
29
44
  end
30
45
 
31
- def test_array_tie
32
- assert_ties_to [ "K blabla u", " blabla ", "s blabla.", " A blabla?" ], [ "K~blabla u", "~blabla ", "s~blabla.", " A~blabla?" ]
33
- end
34
-
35
46
  def test_regex_works
36
47
  regex = Tier::REGEX
37
48
  assert !("ahoj" =~ regex)
@@ -50,7 +61,7 @@ module Ovec
50
61
  end
51
62
 
52
63
  def test_tie_across_newline
53
- assert_ties_to "Pojednani pojednavajici\no pojednavani.", "Pojednavani pojednavajici\no~pojednavani."
64
+ assert_ties_to "Pojednavani pojednavajici\no pojednavani.", "Pojednavani pojednavajici\no~pojednavani."
54
65
  end
55
66
 
56
67
  def test_tie_a_after_pause
@@ -65,5 +76,16 @@ module Ovec
65
76
  def test_tie_various
66
77
  assert_ties_to "Je-li x sudé, je dělitelné dvěma (v opačném případě není).", "Je-li x sudé, je dělitelné dvěma (v~opačném případě není)."
67
78
  end
79
+
80
+ def test_tie_in_newline
81
+ assert_ties_to "V\nrámci\ntohohle", "V~rámci\ntohohle"
82
+ assert_ties_to "V\nrámci tohohle", "V~rámci tohohle"
83
+ end
84
+
85
+ def test_date_regex_ok
86
+ assert "10" =~ /\A\p{Nd}*\Z/
87
+ assert "1. 3. 2013" =~ Tier::DATE_REGEX
88
+ assert "Bylo zrovna 1. 3. 2013." =~ Tier::DATE_REGEX
89
+ end
68
90
  end
69
91
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ovec
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.5
4
+ version: 0.0.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Michal Pokorný