epos 0.9.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/LICENSE +674 -0
- data/README.md +45 -0
- data/README.pt-br.md +43 -0
- data/epico.png +0 -0
- data/epos.gemspec +20 -0
- data/examples/web.rb +9 -0
- data/lib/epos/cp/f1.rb +14 -0
- data/lib/epos/cp/f10.rb +18 -0
- data/lib/epos/cp/f11.rb +51 -0
- data/lib/epos/cp/f12.rb +19 -0
- data/lib/epos/cp/f13.rb +21 -0
- data/lib/epos/cp/f16.rb +8 -0
- data/lib/epos/cp/f2.rb +55 -0
- data/lib/epos/cp/f3.rb +19 -0
- data/lib/epos/cp/f4.rb +19 -0
- data/lib/epos/cp/f6.rb +34 -0
- data/lib/epos/cp/f7.rb +19 -0
- data/lib/epos/cp/f8.rb +29 -0
- data/lib/epos/cp/f9.rb +14 -0
- data/lib/epos/data-file.rb +28 -0
- data/lib/epos/dictionary.rb +104 -0
- data/lib/epos/encoded-file.rb +38 -0
- data/lib/epos/entry-parser.rb +157 -0
- data/lib/epos/html/attrs.slim +14 -0
- data/lib/epos/html/defin-body.slim +8 -0
- data/lib/epos/html/defins.slim +8 -0
- data/lib/epos/html/entry.slim +7 -0
- data/lib/epos/html/extra-tab.slim +3 -0
- data/lib/epos/html/headword.slim +14 -0
- data/lib/epos/html/idioms-tab.slim +8 -0
- data/lib/epos/html/senses-tab.slim +8 -0
- data/lib/epos/html/style.css +54 -0
- data/lib/epos/html/symbols-tab.slim +4 -0
- data/lib/epos/html-formatter.rb +109 -0
- data/lib/epos/index-file.rb +35 -0
- data/lib/epos/indexed-data-file.rb +25 -0
- data/lib/epos/search.rb +7 -0
- data/lib/epos/text-parser.rb +184 -0
- data/lib/epos.rb +9 -0
- metadata +97 -0
@@ -0,0 +1,157 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
module Epos
|
3
|
+
|
4
|
+
class EntryParser
|
5
|
+
|
6
|
+
def parse(text)
|
7
|
+
|
8
|
+
entry = {
|
9
|
+
:headword => nil,
|
10
|
+
:n => nil,
|
11
|
+
:trademark => nil,
|
12
|
+
:translation => nil,
|
13
|
+
:pronunciation => nil,
|
14
|
+
:language => nil,
|
15
|
+
:first_use => nil,
|
16
|
+
:orthoepy => nil,
|
17
|
+
:variants => [],
|
18
|
+
:old_spelling => nil,
|
19
|
+
:see => false,
|
20
|
+
:tabs => [],
|
21
|
+
:divs => [],
|
22
|
+
}
|
23
|
+
|
24
|
+
div = nil
|
25
|
+
defin = nil
|
26
|
+
either = nil
|
27
|
+
|
28
|
+
text.lines.each do |line|
|
29
|
+
code = line[0]
|
30
|
+
suffix = line[1..-1].strip
|
31
|
+
|
32
|
+
case code
|
33
|
+
|
34
|
+
when "*"
|
35
|
+
entry[:headword] = suffix
|
36
|
+
when ">"
|
37
|
+
entry[:see] = true
|
38
|
+
when "t"
|
39
|
+
entry[:translation] = suffix
|
40
|
+
when "p"
|
41
|
+
entry[:pronunciation] = suffix
|
42
|
+
when "n"
|
43
|
+
entry[:n] = suffix.to_i
|
44
|
+
when "L"
|
45
|
+
entry[:language] = suffix.split("|")
|
46
|
+
when "®"
|
47
|
+
entry[:trademark] = true
|
48
|
+
when "d"
|
49
|
+
entry[:first_use] = suffix
|
50
|
+
when "o"
|
51
|
+
entry[:orthoepy] = suffix
|
52
|
+
when "M"
|
53
|
+
entry[:variants] = suffix.split("|")
|
54
|
+
when "$"
|
55
|
+
entry[:old_spelling] = suffix
|
56
|
+
when "0"
|
57
|
+
entry[:tabs] << {:type => :grammar, :text => suffix}
|
58
|
+
when "1"
|
59
|
+
entry[:tabs] << {:type => :grammar_usage, :text => suffix}
|
60
|
+
when "2"
|
61
|
+
entry[:tabs] << {:type => :usage, :text => suffix}
|
62
|
+
when "3"
|
63
|
+
entry[:tabs] << {:type => :etymology, :text => suffix}
|
64
|
+
when "4"
|
65
|
+
entry[:tabs] << {:type => :synonyms, :text => suffix}
|
66
|
+
when "5"
|
67
|
+
entry[:tabs] << {:type => :antonyms, :text => suffix}
|
68
|
+
when "6"
|
69
|
+
entry[:tabs] << {:type => :collective, :text => suffix}
|
70
|
+
when "7"
|
71
|
+
entry[:tabs] << {:type => :homonyms, :text => suffix}
|
72
|
+
when "8"
|
73
|
+
entry[:tabs] << {:type => :paronyms, :text => suffix}
|
74
|
+
when "9"
|
75
|
+
entry[:tabs] << {:type => :animals, :text => suffix}
|
76
|
+
|
77
|
+
when "B"
|
78
|
+
div = {
|
79
|
+
:pos => nil,
|
80
|
+
:idiom => nil,
|
81
|
+
:field => nil,
|
82
|
+
:regional => nil,
|
83
|
+
:register => nil,
|
84
|
+
:temporal => nil,
|
85
|
+
:freq => nil,
|
86
|
+
:plural => nil,
|
87
|
+
:symbols => nil,
|
88
|
+
:derivation => nil,
|
89
|
+
:defins => [],
|
90
|
+
}
|
91
|
+
entry[:divs] << div
|
92
|
+
either = div
|
93
|
+
defin = nil
|
94
|
+
when "C"
|
95
|
+
div[:pos] = suffix.split("|")
|
96
|
+
when "#"
|
97
|
+
div[:idiom] = suffix
|
98
|
+
when "P"
|
99
|
+
div[:plural] = suffix
|
100
|
+
when "s"
|
101
|
+
div[:symbols] = true
|
102
|
+
|
103
|
+
when "A"
|
104
|
+
defin = {
|
105
|
+
:num => nil,
|
106
|
+
:text => nil,
|
107
|
+
:transty => nil,
|
108
|
+
:examples => [],
|
109
|
+
:note => nil,
|
110
|
+
:derivation => nil,
|
111
|
+
:field => nil,
|
112
|
+
:register => nil,
|
113
|
+
:freq => nil,
|
114
|
+
:regional => nil,
|
115
|
+
:temporal => nil,
|
116
|
+
:see => entry[:see],
|
117
|
+
}
|
118
|
+
either = defin
|
119
|
+
div[:defins] << defin
|
120
|
+
when "-"
|
121
|
+
defin[:num] = suffix
|
122
|
+
when ":"
|
123
|
+
defin[:text] = suffix
|
124
|
+
when "r"
|
125
|
+
defin[:transty] = suffix.split("|")
|
126
|
+
when "<"
|
127
|
+
defin[:examples] = suffix.split("|")
|
128
|
+
when "!"
|
129
|
+
defin[:note] = suffix
|
130
|
+
when "D"
|
131
|
+
either[:derivation] = suffix.split("|")
|
132
|
+
when "T"
|
133
|
+
either[:field] = suffix.split("|")
|
134
|
+
when "U"
|
135
|
+
either[:register] = suffix.split("|")
|
136
|
+
when "E"
|
137
|
+
either[:freq] = suffix.split("|")
|
138
|
+
when "R"
|
139
|
+
either[:regional] = suffix.split("|")
|
140
|
+
when "I"
|
141
|
+
either[:temporal] = suffix.split("|")
|
142
|
+
|
143
|
+
when "c" # hidden, apparently
|
144
|
+
when "S" # unknown
|
145
|
+
when "v" # unknown
|
146
|
+
when "\\" # coding error
|
147
|
+
else
|
148
|
+
raise "#{code}#{suffix}"
|
149
|
+
end
|
150
|
+
end
|
151
|
+
|
152
|
+
return entry
|
153
|
+
end
|
154
|
+
|
155
|
+
end
|
156
|
+
|
157
|
+
end
|
@@ -0,0 +1,14 @@
|
|
1
|
+
- [ATTRS1, ATTRS2, ATTRS3, ATTRS4, ATTRS5].each do |attrs|
|
2
|
+
- existing = attrs.select{|name| data[name]}
|
3
|
+
- if existing.length > 0
|
4
|
+
p.attrs
|
5
|
+
- existing.each_with_index do |(name, label), index|
|
6
|
+
- if index > 0
|
7
|
+
= '; '
|
8
|
+
- if label
|
9
|
+
span.label = label
|
10
|
+
- if data[name].is_a?(String) || data[name].is_a?(Array)
|
11
|
+
- if label
|
12
|
+
= ' '
|
13
|
+
span class=('value ' + name.to_s.gsub(/_/, '-'))
|
14
|
+
== render_text(data[name].is_a?(String) ? data[name] : data[name][@string_index])
|
@@ -0,0 +1,7 @@
|
|
1
|
+
div.epos
|
2
|
+
== render(:headword, data)
|
3
|
+
== render(:senses_tab, data[:divs].reject{|div| div[:idiom] || div[:symbols]})
|
4
|
+
== render(:symbols_tab, data[:divs].select{|div| div[:symbols]})
|
5
|
+
== render(:idioms_tab, data[:divs].select{|div| div[:idiom]})
|
6
|
+
- data[:tabs].each do |tab|
|
7
|
+
== render(:extra_tab, tab)
|
@@ -0,0 +1,14 @@
|
|
1
|
+
h1
|
2
|
+
= '■ '
|
3
|
+
- if data[:n]
|
4
|
+
sup = data[:n]
|
5
|
+
- if data[:language]
|
6
|
+
i == render_text(data[:headword])
|
7
|
+
- else
|
8
|
+
== render_text(data[:headword])
|
9
|
+
- vars = data[:variants].map{|var| render_text(var)}
|
10
|
+
- if vars.size > 0
|
11
|
+
== vars[0..-2].map{|alt| ", #{alt}"}.join
|
12
|
+
== " #{OR} "
|
13
|
+
== vars.last
|
14
|
+
== render(:attrs, data)
|
@@ -0,0 +1,54 @@
|
|
1
|
+
.epos{
|
2
|
+
font-family: "Noto Sans", "DejaVu Sans", sans-serif;
|
3
|
+
color: #333333;
|
4
|
+
}
|
5
|
+
|
6
|
+
.epos h1, .epos h2, .epos h3{
|
7
|
+
margin-bottom: 0;
|
8
|
+
font-weight: bold;
|
9
|
+
}
|
10
|
+
.epos h3{
|
11
|
+
margin: 0;
|
12
|
+
}
|
13
|
+
.epos h1{
|
14
|
+
font-size: 120%;
|
15
|
+
}
|
16
|
+
.epos h2{
|
17
|
+
font-size: 110%;
|
18
|
+
}
|
19
|
+
.epos h3{
|
20
|
+
font-size: 100%;
|
21
|
+
}
|
22
|
+
|
23
|
+
.epos td{
|
24
|
+
padding: 0;
|
25
|
+
}
|
26
|
+
.epos td.num{
|
27
|
+
font-weight: bold;
|
28
|
+
padding-right: 5px;
|
29
|
+
}
|
30
|
+
|
31
|
+
.epos p{
|
32
|
+
margin: 0;
|
33
|
+
}
|
34
|
+
.epos p.example{
|
35
|
+
color: #006600;
|
36
|
+
font-style: italic;
|
37
|
+
}
|
38
|
+
.epos .no-bold{
|
39
|
+
font-weight: normal;
|
40
|
+
}
|
41
|
+
|
42
|
+
.epos span.label{
|
43
|
+
font-style: italic;
|
44
|
+
color: #2e3e8c;
|
45
|
+
}
|
46
|
+
.epos span.value{
|
47
|
+
color: #666666;
|
48
|
+
}
|
49
|
+
.epos span.value.transty{
|
50
|
+
color: #aaaa55;
|
51
|
+
}
|
52
|
+
.epos span.value.note{
|
53
|
+
color: #aa5555;
|
54
|
+
}
|
@@ -0,0 +1,109 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
require 'slim'
|
3
|
+
require 'epos/text-parser.rb'
|
4
|
+
|
5
|
+
module Epos
|
6
|
+
|
7
|
+
class HtmlFormatter
|
8
|
+
|
9
|
+
def initialize(unmarked: false, compact: false)
|
10
|
+
@unmarked = unmarked
|
11
|
+
@string_index = compact ? -2 : -1
|
12
|
+
@parser = TextParser.new
|
13
|
+
@html_base = File.join(File.dirname(__FILE__), "html")
|
14
|
+
|
15
|
+
load_templates()
|
16
|
+
end
|
17
|
+
|
18
|
+
def format(entry)
|
19
|
+
render(:entry, entry)
|
20
|
+
end
|
21
|
+
|
22
|
+
def style
|
23
|
+
File.read(File.join(@html_base, "style.css"))
|
24
|
+
end
|
25
|
+
|
26
|
+
protected
|
27
|
+
|
28
|
+
def load_templates
|
29
|
+
slim = Tilt["slim"]
|
30
|
+
|
31
|
+
@templates = [:extra_tab, :symbols_tab, :idioms_tab, :senses_tab, :defins, :defin_body, :attrs, :headword, :entry]
|
32
|
+
@templates = @templates.map{|sym| [sym, slim.new(File.join(@html_base, sym.to_s.gsub(/_/, "-") + ".slim"))]}.to_h
|
33
|
+
end
|
34
|
+
|
35
|
+
def render(item, data)
|
36
|
+
@templates[item].render(self, :data => data)
|
37
|
+
end
|
38
|
+
|
39
|
+
def render_fragment(fragment, format)
|
40
|
+
tags = format.keys.map{|name| FORMATS[name]}
|
41
|
+
raise if tags.index(nil)
|
42
|
+
|
43
|
+
open = tags .map{|tag| "<" + tag[:name] + (tag[:style] ? " style='#{tag[:style]}'" : "") + ">"}.join
|
44
|
+
close = tags.reverse.map{|tag| "</" + tag[:name] + ">"}.join
|
45
|
+
|
46
|
+
return open + fragment + close
|
47
|
+
end
|
48
|
+
|
49
|
+
def render_text(text)
|
50
|
+
@parser.parse(text).map{|fragment, format| render_fragment(fragment, format)}.join
|
51
|
+
end
|
52
|
+
|
53
|
+
FORMATS = {
|
54
|
+
"i" => {:name => "i"},
|
55
|
+
"b" => {:name => "b"},
|
56
|
+
"super" => {:name => "sup"},
|
57
|
+
"sub" => {:name => "sub"},
|
58
|
+
"ul" => {:name => "span", :style => "text-decoration: underline;"},
|
59
|
+
"f5" => {:name => "span", :style => "font-variant: small-caps;"},
|
60
|
+
"strike" => {:name => "span", :style => "text-decoration: line-through;"},
|
61
|
+
}
|
62
|
+
|
63
|
+
ATTRS1 = {
|
64
|
+
:field => "rubrica",
|
65
|
+
:regional => "regionalismo",
|
66
|
+
:register => "uso",
|
67
|
+
:temporal => "diacronismo",
|
68
|
+
:freq => "estatística",
|
69
|
+
:derivation => "derivação",
|
70
|
+
}
|
71
|
+
ATTRS2 = {
|
72
|
+
:note => nil,
|
73
|
+
}
|
74
|
+
ATTRS3 = {
|
75
|
+
:transty => nil,
|
76
|
+
}
|
77
|
+
ATTRS4 = {
|
78
|
+
:old_spelling => "forma antiga",
|
79
|
+
:trademark => "marca registrada",
|
80
|
+
:language => "língua",
|
81
|
+
:translation => "tradução",
|
82
|
+
:pronunciation => "pronúncia",
|
83
|
+
:first_use => "datação",
|
84
|
+
:orthoepy => "ortoépia",
|
85
|
+
}
|
86
|
+
ATTRS5 = {
|
87
|
+
:plural => "plural",
|
88
|
+
}
|
89
|
+
|
90
|
+
TABS = {
|
91
|
+
:senses => "acepções",
|
92
|
+
:idioms => "locuções",
|
93
|
+
:symbols => "símbolos e abreviações",
|
94
|
+
:grammar => "gramática",
|
95
|
+
:grammar_usage => "gramática e uso",
|
96
|
+
:usage => "uso",
|
97
|
+
:etymology => "etimologia",
|
98
|
+
:synonyms => "sinônimos",
|
99
|
+
:antonyms => "antônimos",
|
100
|
+
:collective => "coletivos",
|
101
|
+
:homonyms => "homônimos",
|
102
|
+
:paronyms => "parônimos",
|
103
|
+
:animals => "vozes de animais",
|
104
|
+
}
|
105
|
+
|
106
|
+
OR = "ou"
|
107
|
+
|
108
|
+
end
|
109
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
require 'epos/encoded-file.rb'
|
2
|
+
|
3
|
+
module Epos
|
4
|
+
class IndexFile
|
5
|
+
|
6
|
+
def initialize(path)
|
7
|
+
data = EncodedFile.read(path)
|
8
|
+
lines = data.lines
|
9
|
+
@index = {}
|
10
|
+
lines.each do |line|
|
11
|
+
i = line.index(";")
|
12
|
+
key = line[0..i - 1]
|
13
|
+
val = line[i + 1..-2].to_i
|
14
|
+
if @index.has_key?(key)
|
15
|
+
@index[key] << val
|
16
|
+
else
|
17
|
+
@index[key] = [val]
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
def look_up(key)
|
23
|
+
return @index[key] || []
|
24
|
+
end
|
25
|
+
|
26
|
+
def keys
|
27
|
+
return @index.keys
|
28
|
+
end
|
29
|
+
|
30
|
+
def has_key?(key)
|
31
|
+
@index.has_key?(key)
|
32
|
+
end
|
33
|
+
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
require 'epos/index-file.rb'
|
2
|
+
require 'epos/data-file.rb'
|
3
|
+
|
4
|
+
module Epos
|
5
|
+
class IndexedDataFile
|
6
|
+
|
7
|
+
def initialize(index_path, data_path)
|
8
|
+
@index = IndexFile.new(index_path)
|
9
|
+
@data = DataFile.new(data_path)
|
10
|
+
end
|
11
|
+
|
12
|
+
def look_up(key)
|
13
|
+
@index.look_up(key).map{|pos| @data.read_entry(pos)}
|
14
|
+
end
|
15
|
+
|
16
|
+
def keys
|
17
|
+
@index.keys
|
18
|
+
end
|
19
|
+
|
20
|
+
def has_key?(key)
|
21
|
+
@index.has_key?(key)
|
22
|
+
end
|
23
|
+
|
24
|
+
end
|
25
|
+
end
|
data/lib/epos/search.rb
ADDED
@@ -0,0 +1,184 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
require 'epos/cp/f1.rb'
|
3
|
+
require 'epos/cp/f2.rb'
|
4
|
+
require 'epos/cp/f3.rb'
|
5
|
+
require 'epos/cp/f4.rb'
|
6
|
+
require 'epos/cp/f6.rb'
|
7
|
+
require 'epos/cp/f7.rb'
|
8
|
+
require 'epos/cp/f8.rb'
|
9
|
+
require 'epos/cp/f9.rb'
|
10
|
+
require 'epos/cp/f10.rb'
|
11
|
+
require 'epos/cp/f11.rb'
|
12
|
+
require 'epos/cp/f12.rb'
|
13
|
+
require 'epos/cp/f13.rb'
|
14
|
+
require 'epos/cp/f16.rb'
|
15
|
+
|
16
|
+
module Epos
|
17
|
+
|
18
|
+
class TextParser
|
19
|
+
|
20
|
+
def parse(text)
|
21
|
+
@result = []
|
22
|
+
@format = [{}]
|
23
|
+
@fragment = ""
|
24
|
+
@cmd = ""
|
25
|
+
|
26
|
+
s = :reading_fragment
|
27
|
+
code = ""
|
28
|
+
|
29
|
+
text.each_char do |c|
|
30
|
+
|
31
|
+
case s
|
32
|
+
|
33
|
+
when :reading_fragment
|
34
|
+
case c
|
35
|
+
when "\\"
|
36
|
+
s = :escape_started
|
37
|
+
when "{"
|
38
|
+
self.flush
|
39
|
+
@format << @format.last.clone
|
40
|
+
when "}"
|
41
|
+
self.flush
|
42
|
+
@format.pop if @format.length > 1 # Entry "bum-bum" is broken.
|
43
|
+
else
|
44
|
+
@fragment << c
|
45
|
+
end
|
46
|
+
|
47
|
+
when :escape_started
|
48
|
+
case c
|
49
|
+
when "\\"
|
50
|
+
@fragment << c
|
51
|
+
s = :reading_fragment
|
52
|
+
when "{"
|
53
|
+
@fragment << c
|
54
|
+
s = :reading_fragment
|
55
|
+
when "'"
|
56
|
+
code = ""
|
57
|
+
s = :reading_code
|
58
|
+
else
|
59
|
+
@cmd = c
|
60
|
+
s = :reading_command
|
61
|
+
end
|
62
|
+
|
63
|
+
when :reading_command
|
64
|
+
case c
|
65
|
+
when " "
|
66
|
+
self.command
|
67
|
+
s = :reading_fragment
|
68
|
+
when "\\"
|
69
|
+
self.command
|
70
|
+
s = :escape_started
|
71
|
+
when /[a-z0-9]/
|
72
|
+
@cmd << c
|
73
|
+
when "{"
|
74
|
+
self.command
|
75
|
+
self.flush
|
76
|
+
@format << @format.last.clone
|
77
|
+
s = :reading_fragment
|
78
|
+
when "}"
|
79
|
+
self.command
|
80
|
+
self.flush
|
81
|
+
@format.pop
|
82
|
+
s = :reading_fragment
|
83
|
+
else
|
84
|
+
self.command
|
85
|
+
@fragment << c
|
86
|
+
s = :reading_fragment
|
87
|
+
end
|
88
|
+
|
89
|
+
when :reading_code
|
90
|
+
code << c
|
91
|
+
if code.length == 2
|
92
|
+
@fragment << [code.to_i(16)].pack("U")
|
93
|
+
s = :reading_fragment
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
self.command if s == :reading_command
|
99
|
+
self.flush
|
100
|
+
|
101
|
+
return @result
|
102
|
+
|
103
|
+
end
|
104
|
+
|
105
|
+
protected
|
106
|
+
|
107
|
+
CODE_PAGES = {
|
108
|
+
"f1" => CodePage::F1_MAP,
|
109
|
+
"f2" => CodePage::F2_MAP,
|
110
|
+
"f3" => CodePage::F3_MAP,
|
111
|
+
"f4" => CodePage::F4_MAP,
|
112
|
+
"f6" => CodePage::F6_MAP,
|
113
|
+
"f7" => CodePage::F7_MAP,
|
114
|
+
"f8" => CodePage::F8_MAP,
|
115
|
+
"f9" => CodePage::F9_MAP,
|
116
|
+
"f10" => CodePage::F10_MAP,
|
117
|
+
"f11" => CodePage::F11_MAP,
|
118
|
+
"f12" => CodePage::F12_MAP,
|
119
|
+
"f13" => CodePage::F13_MAP,
|
120
|
+
"f16" => CodePage::F16_MAP,
|
121
|
+
}
|
122
|
+
|
123
|
+
def convert_encoding(text, f)
|
124
|
+
s = ""
|
125
|
+
cp = CODE_PAGES[f]
|
126
|
+
text.each_char do |c|
|
127
|
+
case
|
128
|
+
when cp[c]
|
129
|
+
s << cp[c]
|
130
|
+
when f == "f1" && !(0x80..0xa0).include?(c.ord)
|
131
|
+
s << c
|
132
|
+
else
|
133
|
+
raise "#{f}:#{c}:#{c.ord.to_s(16)}"
|
134
|
+
end
|
135
|
+
end
|
136
|
+
s
|
137
|
+
end
|
138
|
+
|
139
|
+
def flush
|
140
|
+
format = @format.last
|
141
|
+
|
142
|
+
# This happens *once* ("mico-leão") in all of Houaiss.
|
143
|
+
@fragment.upcase! if format["caps"]
|
144
|
+
format.delete("caps")
|
145
|
+
|
146
|
+
@fragment = self.convert_encoding(@fragment, format["f"] || "f1")
|
147
|
+
format.delete("f")
|
148
|
+
|
149
|
+
@result << [@fragment, format]
|
150
|
+
@fragment = ""
|
151
|
+
end
|
152
|
+
|
153
|
+
def command
|
154
|
+
self.flush if @fragment.length > 0
|
155
|
+
|
156
|
+
case
|
157
|
+
when @cmd =~ /f[0-9][0-9]?/ && @cmd != "f5"
|
158
|
+
@cmd = "f1" if @cmd == "f0"
|
159
|
+
@format.last["f"] = @cmd
|
160
|
+
|
161
|
+
when ["lang1023", "lang1046", "ltrpar", "li100", "sa100", "sb100", "fs20", "fs22", "fs24"].include?(@cmd)
|
162
|
+
# what is this i don't even
|
163
|
+
|
164
|
+
when @cmd == "ulnone" then @format.last.delete("ul")
|
165
|
+
when @cmd == "i0" then @format.last.delete("i")
|
166
|
+
when @cmd == "caps0" then @format.last.delete("caps")
|
167
|
+
when @cmd == "strike0" then @format.last.delete("strike")
|
168
|
+
when @cmd == "nosupersub" then @format.last.delete("super")
|
169
|
+
@format.last.delete("sub")
|
170
|
+
|
171
|
+
when @cmd == "bullet" then @fragment << "·"
|
172
|
+
when @cmd == "lquote" then @fragment << "‘"
|
173
|
+
when @cmd == "rquote" then @fragment << "’"
|
174
|
+
when @cmd == "ldblquote" then @fragment << '“'
|
175
|
+
when @cmd == "rdblquote" then @fragment << '”'
|
176
|
+
|
177
|
+
else
|
178
|
+
@format.last[@cmd] = true
|
179
|
+
end
|
180
|
+
|
181
|
+
end
|
182
|
+
|
183
|
+
end
|
184
|
+
end
|
data/lib/epos.rb
ADDED
@@ -0,0 +1,9 @@
|
|
1
|
+
require 'epos/data-file.rb'
|
2
|
+
require 'epos/dictionary.rb'
|
3
|
+
require 'epos/encoded-file.rb'
|
4
|
+
require 'epos/entry-parser.rb'
|
5
|
+
require 'epos/html-formatter.rb'
|
6
|
+
require 'epos/indexed-data-file.rb'
|
7
|
+
require 'epos/index-file.rb'
|
8
|
+
require 'epos/search.rb'
|
9
|
+
require 'epos/text-parser.rb'
|