RubyGems - ovec - Versions diffs - 0.0.5 → 0.0.6 - Mend

ovec 0.0.5 → 0.0.6

Files changed (7) hide show

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 0283a4198221eac5d155b9543b4688255653b0cd
-  data.tar.gz: 5da47f5452811131bef97e974c7808e0767a7285
+  metadata.gz: 072ab08b381a445811c64db29452dd28f5b1c575
+  data.tar.gz: 7fc5d8aa41477ffcf71499df5d44cc88f35a52dd
 SHA512:
-  metadata.gz: 7f70d8f5ae05124482fe1ade3270df03d3c00318fcdb63a155dd203a9f4999a009def0cf705a57f4f2fc3e422f185294ba5521aa494dfb474c97311afefa66db
-  data.tar.gz: d963c7034b41eb9e04f45ebf3b6200c1b975b2c53042cbf9e9c32aa13338715a7eefedbc1833b2f0eb502a80f8fc1a49efbd76fb9c84e49613f17dd9c4cb8e00
+  metadata.gz: 22dbbd36ec18a956479d356ba949405b3e211631b3ec89f1f30d0e9435a69a7d1d4590b525b1d909dc57d88974cc80d38f6f2a6ffdf2276f5e25c403667e5aa9
+  data.tar.gz: 2e193c2a77a1daefeff432b8ab8da85f8f70807e76e1194268f0170fa72a69c9a844352466df5b3e760612fdcf1d60985a7328c2e12d5f533fb48f00c3704ffc

data/TODO CHANGED Viewed

@@ -10,3 +10,4 @@
 - ne-UTF8 soubory
 - mista parsovacich chyb
+- vlnkovani na rozhrani radku

data/bin/ovec CHANGED Viewed

@@ -6,13 +6,9 @@ require 'optparse'
 options = {}
 opts_parser = OptionParser.new do |opts|
-	opts.banner = "Usage: ovec [options]"
+	opts.banner = "Usage: ovec [options] [input file(s)]"
-	opts.on("-i", "--input FILENAME", "Input filename or - for stdin (required)") do |filename|
-		options[:input] = filename
-	end
-	opts.on("-o", "--output FILENAME", "Output filename or - for stdout (required)") do |filename|
+	opts.on("-o", "--output FILENAME", "Output filename (STDOUT is used if omitted)") do |filename|
 		options[:output] = filename
 	end
@@ -31,19 +27,11 @@ opts_parser = OptionParser.new do |opts|
 	end
 end
-input_file = nil
 output_file = nil
 begin
 	opts_parser.parse!
-	unless options.key?(:input) && options.key?(:output)
-		$stderr.puts "You must specify both an input file and an output file."
-		$stderr.puts opts_parser.help
-		exit 1
-	end
-	input_file = (options[:input] == '-') ? STDIN : File.open(options[:input], "r")
-	output_file = (options[:output] == '-') ? STDOUT : File.open(options[:output], "w")
+	output_file = options[:output] ? File.open(options[:output], "w") : STDOUT
 rescue OptionParser::ParseError => ex
 	$stderr.puts "Invalid options: #{ex.message}"
 	$stderr.puts opts_parser.help
@@ -53,7 +41,8 @@ rescue StandardError => ex
 	exit 1
 end
-content = input_file.read
+# OptionParser#parse! changes ARGV, so ARGF works here.
+content = ARGF.read
 parser = Ovec::Parser.new(debug: options[:debug])
 tree = parser.parse(content)
@@ -66,6 +55,4 @@ tier = Ovec::Tier.new
 tm.run_text_manipulator(tier)
 output_file.puts tree.to_tex
-input_file.close
 output_file.close

data/lib/ovec/tier.rb CHANGED Viewed

@@ -4,20 +4,20 @@ module Ovec
 	class Tier < TextManipulator
 		# The last character this regex matches is changed to a tilde.
 		REGEX = /(
-			((\p{Z}|\~|\n)[KkSsVvZzOoUu]\p{Z})|   # KSVZOU jako samostatne slovo
-			([\.\?\!](\p{Z}|\~)+[KSVZOUAI]\p{Z})| # KSVZOUAI na zacatku vety
-			(\A[KSVZOUAI]\p{Z})|                  # KSVZOUAI na zacatku textu
+			((\p{Z}|[~\n()\[\]\{\}])[KkSsVvZzOoUu](\p{Z}|\n))|   # KSVZOU jako samostatne slovo
+			([\.\?\!](\p{Z}|\~)+[KSVZOUAI](\p{Z}|\n))| # KSVZOUAI na zacatku vety
+			(\A[KSVZOUAI](\p{Z}|\n))|                  # KSVZOUAI na zacatku textu
 			(\p{Z}(?=--(\p{Z}|\n)))|              # mezera, za kterou je pomlcka
-			(,(\p{Z}|\~|\n)+a\p{Z})               # ... modulo 10, a~timto prvkem ...; TODO: plati tohle i pro "i"?
+			(,(\p{Z}|\~|\n)+a(\p{Z}|\n))               # ... modulo 10, a~timto prvkem ...; TODO: plati tohle i pro "i"?
 		)/x
 		# TODO: generally tie "5.~batalion", ...
 		# All changes within this regex are changed to a tilde.
 		DATE_REGEX = /(
-			(?<=\p{Z})\p{Nd}{1,2}\.\p{Z}
+			(?<=\p{Z}|\A)\p{Nd}{1,2}\.\p{Z}
 			(\p{Nd}{1,2}\.|leden|únor|březen|duben|květen|červen|červenec|srpen|září|říjen|listopad|prosinec| # TODO: plne sklonovani? nebo nejaky wildcard?
 				ledna|února|března|dubna|května|června|července|srpna|září|října|listopadu|prosince)\p{Z}
-			\p{Nd}{4}(?=\p{Z}) # Datum jako "1. 5. 2013"
+			\p{Nd}{4}(?=(\p{Z}|[.,?!]|\Z)) # Datum jako "1. 5. 2013"
 		)/x
 		def run
@@ -33,7 +33,24 @@ module Ovec
 					match = matches[i]
 					change = match.end(0) - 1
 					chunk, offset = _find_chunk_and_offset(change)
+					former_character = chunk[offset]
 					chunk[offset] = '~'
+					if former_character == '\n'
+						# If we changed a newline to a tilde, change previous space to a
+						# newline -- move the tied word to the other line.
+						j = change - 1
+						while j >= 0
+							if @joined[j] == ' '
+								chunk, offset = _find_chunk_and_offset(j)
+								chunk[offset] = '\n'
+								break
+							end
+							j -= 1
+						end
+					end
 				end
 				_rejoin
@@ -42,11 +59,10 @@ module Ovec
 			# Dates can't overlap. 1 scan is enough.
 			matches = @joined.to_enum(:scan, DATE_REGEX).map { Regexp.last_match }
-			for i in 0...matches.length
-				match = matches[i]
-				for j in match.begin...match.end
-					if @joined[j] == ' '
-						chunk, offset = _find_chunk_and_offset(j)
+			for match in matches
+				for i in (match.begin(0))...(match.end(0))
+					if @joined[i] == ' '
+						chunk, offset = _find_chunk_and_offset(i)
 						chunk[offset] = '~'
 					end
 				end

data/lib/ovec/version.rb CHANGED Viewed

@@ -1,4 +1,4 @@
 module Ovec
-  VERSION = "0.0.5"
+  VERSION = "0.0.6"
 	NAME_WITH_VERSION = "Ovec #{VERSION}"
 end

data/test/lib/ovec/tier.rb CHANGED Viewed

@@ -6,32 +6,43 @@ module Ovec
 			@tier = Tier.new
 		end
+		private
+		def assert_ties_to(input, output)
+			parser = Ovec::Parser.new(debug: true)
+			tree = parser.parse(input.dup)
+			tm = Ovec::TexManipulator.new
+			tm.bind(tree)
+			tm.run_text_manipulator(@tier)
+			text = tree.to_tex
+			assert_equal output, text
+		end
+		public
 		def test_basic_without_ties
 			text = "Ahoj. Jak se máš?"
 			text_duplicate = text.dup
-			@tier.bind([text_duplicate])
-			@tier.run
-			assert_equal text, text_duplicate
-		end
-		private
-		def assert_ties_to(input, output)
-			input = [input] if input.is_a? String
-			output = [output] if output.is_a? String
-			text = input.dup
-			@tier.bind(test)
-			@tier.run
-			assert_equal text, outpu
+			parser = Ovec::Parser.new(debug: true)
+			tree = parser.parse(text)
+			tm = Ovec::TexManipulator.new
+			tm.bind(tree)
+			tm.run_text_manipulator(@tier)
+			text = tree.to_tex
+			assert_equal text, text_duplicate
 		end
 		def test_simple_tie
 			assert_ties_to "K blabla u blabla s blabla.", "K~blabla u~blabla s~blabla."
 		end
-		def test_array_tie
-			assert_ties_to [ "K blabla u", " blabla ", "s blabla.", " A blabla?" ], [ "K~blabla u", "~blabla ", "s~blabla.", " A~blabla?" ]
-		end
 		def test_regex_works
 			regex = Tier::REGEX
 			assert !("ahoj" =~ regex)
@@ -50,7 +61,7 @@ module Ovec
 		end
 		def test_tie_across_newline
-			assert_ties_to "Pojednani pojednavajici\no pojednavani.", "Pojednavani pojednavajici\no~pojednavani."
+			assert_ties_to "Pojednavani pojednavajici\no pojednavani.", "Pojednavani pojednavajici\no~pojednavani."
 		end
 		def test_tie_a_after_pause
@@ -65,5 +76,16 @@ module Ovec
 		def test_tie_various
 			assert_ties_to "Je-li x sudé, je dělitelné dvěma (v opačném případě není).", "Je-li x sudé, je dělitelné dvěma (v~opačném případě není)."
 		end
+		def test_tie_in_newline
+			assert_ties_to "V\nrámci\ntohohle", "V~rámci\ntohohle"
+			assert_ties_to "V\nrámci tohohle", "V~rámci tohohle"
+		end
+		def test_date_regex_ok
+			assert "10" =~ /\A\p{Nd}*\Z/
+			assert "1. 3. 2013" =~ Tier::DATE_REGEX
+			assert "Bylo zrovna 1. 3. 2013." =~ Tier::DATE_REGEX
+		end
 	end
 end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: ovec
 version: !ruby/object:Gem::Version
-  version: 0.0.5
+  version: 0.0.6
 platform: ruby
 authors:
 - Michal Pokorný