biblicit 2.0.7 → 2.0.8
Sign up to get free protection for your applications and to get access to all the features.
data/lib/biblicit/version.rb
CHANGED
@@ -2,6 +2,8 @@
|
|
2
2
|
# encoding: UTF-8
|
3
3
|
#Author Nguyen Thuy Dung
|
4
4
|
require 'find'
|
5
|
+
require "#{File.dirname(__FILE__)}/forceUtf8"
|
6
|
+
|
5
7
|
#get relative pos in ingeter, values range from 0-10
|
6
8
|
def getPos (val)
|
7
9
|
if val == 0
|
@@ -26,28 +28,6 @@ def getHeader(str)
|
|
26
28
|
return str.downcase
|
27
29
|
end
|
28
30
|
|
29
|
-
# Converts a string to UTF-8. If the string is already valid UTF-8, it just
|
30
|
-
# marks it as such. If the string isn't valid UTF-8, we assume that it's
|
31
|
-
# ISO-8859-1 or Windows-1252 and convert it. If it's not valid in that encoding
|
32
|
-
# either, we just strip all non-UTF-8 characters and call it a day.
|
33
|
-
#
|
34
|
-
# Destructive!
|
35
|
-
def force_utf8!(string)
|
36
|
-
string.force_encoding "UTF-8"
|
37
|
-
return string if string.valid_encoding?
|
38
|
-
|
39
|
-
begin
|
40
|
-
string.force_encoding "Windows-1252" # common superset of 8859-1
|
41
|
-
string.encode! "UTF-8"
|
42
|
-
rescue Encoding::InvalidByteSequenceError,
|
43
|
-
Encoding::UndefinedConversionError
|
44
|
-
string.force_encoding "UTF-8"
|
45
|
-
string.encode! "UTF-16",
|
46
|
-
invalid: :replace, undef: :replace, replace: ""
|
47
|
-
string.encode! "UTF-8"
|
48
|
-
end
|
49
|
-
end
|
50
|
-
|
51
31
|
f = File.open("#{ARGV[0]}")
|
52
32
|
hea_array = Array.new
|
53
33
|
ahea_array = Array.new
|
@@ -0,0 +1,24 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
# Converts a string to UTF-8. If the string is already valid UTF-8, it just
|
4
|
+
# marks it as such. If the string isn't valid UTF-8, we assume that it's
|
5
|
+
# ISO-8859-1 or Windows-1252 and convert it. If it's not valid in that encoding
|
6
|
+
# either, we just strip all non-UTF-8 characters and call it a day.
|
7
|
+
#
|
8
|
+
# Destructive!
|
9
|
+
def force_utf8!(string)
|
10
|
+
string.force_encoding "UTF-8"
|
11
|
+
return string if string.valid_encoding?
|
12
|
+
|
13
|
+
begin
|
14
|
+
string.force_encoding "Windows-1252" # common superset of 8859-1
|
15
|
+
string.encode! "UTF-8"
|
16
|
+
rescue Encoding::InvalidByteSequenceError,
|
17
|
+
Encoding::UndefinedConversionError
|
18
|
+
string.force_encoding "UTF-8"
|
19
|
+
string.encode! "UTF-16",
|
20
|
+
invalid: :replace, undef: :replace, replace: ""
|
21
|
+
string.encode! "UTF-8"
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
@@ -10,6 +10,8 @@ pwd = File.dirname(__FILE__)
|
|
10
10
|
@DATA = "#{pwd}/../../resources/sectLabel/"
|
11
11
|
@TEST_DIR = "/tmp/"
|
12
12
|
|
13
|
+
require "#{@SRC}/forceUtf8"
|
14
|
+
|
13
15
|
name = "#{Time.now.to_i}-#{Process.pid}"
|
14
16
|
|
15
17
|
cmd = "ruby #{@SRC}/extractFeature.rb #{ARGV[0]} > #{@TEST_DIR}/#{name}.test"
|
@@ -28,7 +30,7 @@ end
|
|
28
30
|
|
29
31
|
f = File.open("#{@TEST_DIR}/#{name}.out")
|
30
32
|
while !f.eof do
|
31
|
-
str = f.gets.chomp.strip
|
33
|
+
str = force_utf8!(f.gets).chomp.strip
|
32
34
|
if str != ""
|
33
35
|
l = str.split(" ")
|
34
36
|
output = l.at(l.length-1)
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: biblicit
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.0.
|
4
|
+
version: 2.0.8
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -401,6 +401,7 @@ files:
|
|
401
401
|
- parscit/bin/sectLabel/README.txt
|
402
402
|
- parscit/bin/sectLabel/genericSect/crossValidation.rb
|
403
403
|
- parscit/bin/sectLabel/genericSect/extractFeature.rb
|
404
|
+
- parscit/bin/sectLabel/genericSect/forceUtf8.rb
|
404
405
|
- parscit/bin/sectLabel/genericSectExtract.rb
|
405
406
|
- parscit/bin/sectLabel/getStructureInfo.pl
|
406
407
|
- parscit/bin/sectLabel/redo.sectLabel.pl
|