epos 0.9.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,157 @@
1
+ # -*- coding: utf-8 -*-
2
+ module Epos
3
+
4
+ class EntryParser
5
+
6
+ def parse(text)
7
+
8
+ entry = {
9
+ :headword => nil,
10
+ :n => nil,
11
+ :trademark => nil,
12
+ :translation => nil,
13
+ :pronunciation => nil,
14
+ :language => nil,
15
+ :first_use => nil,
16
+ :orthoepy => nil,
17
+ :variants => [],
18
+ :old_spelling => nil,
19
+ :see => false,
20
+ :tabs => [],
21
+ :divs => [],
22
+ }
23
+
24
+ div = nil
25
+ defin = nil
26
+ either = nil
27
+
28
+ text.lines.each do |line|
29
+ code = line[0]
30
+ suffix = line[1..-1].strip
31
+
32
+ case code
33
+
34
+ when "*"
35
+ entry[:headword] = suffix
36
+ when ">"
37
+ entry[:see] = true
38
+ when "t"
39
+ entry[:translation] = suffix
40
+ when "p"
41
+ entry[:pronunciation] = suffix
42
+ when "n"
43
+ entry[:n] = suffix.to_i
44
+ when "L"
45
+ entry[:language] = suffix.split("|")
46
+ when "®"
47
+ entry[:trademark] = true
48
+ when "d"
49
+ entry[:first_use] = suffix
50
+ when "o"
51
+ entry[:orthoepy] = suffix
52
+ when "M"
53
+ entry[:variants] = suffix.split("|")
54
+ when "$"
55
+ entry[:old_spelling] = suffix
56
+ when "0"
57
+ entry[:tabs] << {:type => :grammar, :text => suffix}
58
+ when "1"
59
+ entry[:tabs] << {:type => :grammar_usage, :text => suffix}
60
+ when "2"
61
+ entry[:tabs] << {:type => :usage, :text => suffix}
62
+ when "3"
63
+ entry[:tabs] << {:type => :etymology, :text => suffix}
64
+ when "4"
65
+ entry[:tabs] << {:type => :synonyms, :text => suffix}
66
+ when "5"
67
+ entry[:tabs] << {:type => :antonyms, :text => suffix}
68
+ when "6"
69
+ entry[:tabs] << {:type => :collective, :text => suffix}
70
+ when "7"
71
+ entry[:tabs] << {:type => :homonyms, :text => suffix}
72
+ when "8"
73
+ entry[:tabs] << {:type => :paronyms, :text => suffix}
74
+ when "9"
75
+ entry[:tabs] << {:type => :animals, :text => suffix}
76
+
77
+ when "B"
78
+ div = {
79
+ :pos => nil,
80
+ :idiom => nil,
81
+ :field => nil,
82
+ :regional => nil,
83
+ :register => nil,
84
+ :temporal => nil,
85
+ :freq => nil,
86
+ :plural => nil,
87
+ :symbols => nil,
88
+ :derivation => nil,
89
+ :defins => [],
90
+ }
91
+ entry[:divs] << div
92
+ either = div
93
+ defin = nil
94
+ when "C"
95
+ div[:pos] = suffix.split("|")
96
+ when "#"
97
+ div[:idiom] = suffix
98
+ when "P"
99
+ div[:plural] = suffix
100
+ when "s"
101
+ div[:symbols] = true
102
+
103
+ when "A"
104
+ defin = {
105
+ :num => nil,
106
+ :text => nil,
107
+ :transty => nil,
108
+ :examples => [],
109
+ :note => nil,
110
+ :derivation => nil,
111
+ :field => nil,
112
+ :register => nil,
113
+ :freq => nil,
114
+ :regional => nil,
115
+ :temporal => nil,
116
+ :see => entry[:see],
117
+ }
118
+ either = defin
119
+ div[:defins] << defin
120
+ when "-"
121
+ defin[:num] = suffix
122
+ when ":"
123
+ defin[:text] = suffix
124
+ when "r"
125
+ defin[:transty] = suffix.split("|")
126
+ when "<"
127
+ defin[:examples] = suffix.split("|")
128
+ when "!"
129
+ defin[:note] = suffix
130
+ when "D"
131
+ either[:derivation] = suffix.split("|")
132
+ when "T"
133
+ either[:field] = suffix.split("|")
134
+ when "U"
135
+ either[:register] = suffix.split("|")
136
+ when "E"
137
+ either[:freq] = suffix.split("|")
138
+ when "R"
139
+ either[:regional] = suffix.split("|")
140
+ when "I"
141
+ either[:temporal] = suffix.split("|")
142
+
143
+ when "c" # hidden, apparently
144
+ when "S" # unknown
145
+ when "v" # unknown
146
+ when "\\" # coding error
147
+ else
148
+ raise "#{code}#{suffix}"
149
+ end
150
+ end
151
+
152
+ return entry
153
+ end
154
+
155
+ end
156
+
157
+ end
@@ -0,0 +1,14 @@
1
+ - [ATTRS1, ATTRS2, ATTRS3, ATTRS4, ATTRS5].each do |attrs|
2
+ - existing = attrs.select{|name| data[name]}
3
+ - if existing.length > 0
4
+ p.attrs
5
+ - existing.each_with_index do |(name, label), index|
6
+ - if index > 0
7
+ = '; '
8
+ - if label
9
+ span.label = label
10
+ - if data[name].is_a?(String) || data[name].is_a?(Array)
11
+ - if label
12
+ = ' '
13
+ span class=('value ' + name.to_s.gsub(/_/, '-'))
14
+ == render_text(data[name].is_a?(String) ? data[name] : data[name][@string_index])
@@ -0,0 +1,8 @@
1
+ - if data[:text]
2
+ p.defin
3
+ - if data[:see]
4
+ = "→ "
5
+ == render_text(data[:text])
6
+ == render(:attrs, data)
7
+ - data[:examples].each do |example|
8
+ p.example == render_text(example)
@@ -0,0 +1,8 @@
1
+ - if data.length == 1 && !data.first[:num]
2
+ == render(:defin_body, data.first)
3
+ - else
4
+ table
5
+ - data.each do |defin|
6
+ tr
7
+ td.num valign='top' = defin[:num]
8
+ td == render(:defin_body, defin)
@@ -0,0 +1,7 @@
1
+ div.epos
2
+ == render(:headword, data)
3
+ == render(:senses_tab, data[:divs].reject{|div| div[:idiom] || div[:symbols]})
4
+ == render(:symbols_tab, data[:divs].select{|div| div[:symbols]})
5
+ == render(:idioms_tab, data[:divs].select{|div| div[:idiom]})
6
+ - data[:tabs].each do |tab|
7
+ == render(:extra_tab, tab)
@@ -0,0 +1,3 @@
1
+ - if data
2
+ h2 = TABS[data[:type]]
3
+ == render_text(data[:text])
@@ -0,0 +1,14 @@
1
+ h1
2
+ = '■ '
3
+ - if data[:n]
4
+ sup = data[:n]
5
+ - if data[:language]
6
+ i == render_text(data[:headword])
7
+ - else
8
+ == render_text(data[:headword])
9
+ - vars = data[:variants].map{|var| render_text(var)}
10
+ - if vars.size > 0
11
+ == vars[0..-2].map{|alt| ", #{alt}"}.join
12
+ == " #{OR} "
13
+ == vars.last
14
+ == render(:attrs, data)
@@ -0,0 +1,8 @@
1
+ - if data.length > 0
2
+ h2 = TABS[:idioms]
3
+ - data.each do |div|
4
+ h3.idiom
5
+ = '‣ '
6
+ == render_text(div[:idiom]).gsub(/@/, "<span class='no-bold'>#{OR}</span>")
7
+ == render(:attrs, div)
8
+ == render(:defins, div[:defins])
@@ -0,0 +1,8 @@
1
+ - if data.length > 0
2
+ - if !@unmarked
3
+ h2 = TABS[:senses]
4
+ - data.each do |div|
5
+ - if div[:pos]
6
+ h3 = div[:pos][@string_index]
7
+ == render(:attrs, div)
8
+ == render(:defins, div[:defins])
@@ -0,0 +1,54 @@
1
+ .epos{
2
+ font-family: "Noto Sans", "DejaVu Sans", sans-serif;
3
+ color: #333333;
4
+ }
5
+
6
+ .epos h1, .epos h2, .epos h3{
7
+ margin-bottom: 0;
8
+ font-weight: bold;
9
+ }
10
+ .epos h3{
11
+ margin: 0;
12
+ }
13
+ .epos h1{
14
+ font-size: 120%;
15
+ }
16
+ .epos h2{
17
+ font-size: 110%;
18
+ }
19
+ .epos h3{
20
+ font-size: 100%;
21
+ }
22
+
23
+ .epos td{
24
+ padding: 0;
25
+ }
26
+ .epos td.num{
27
+ font-weight: bold;
28
+ padding-right: 5px;
29
+ }
30
+
31
+ .epos p{
32
+ margin: 0;
33
+ }
34
+ .epos p.example{
35
+ color: #006600;
36
+ font-style: italic;
37
+ }
38
+ .epos .no-bold{
39
+ font-weight: normal;
40
+ }
41
+
42
+ .epos span.label{
43
+ font-style: italic;
44
+ color: #2e3e8c;
45
+ }
46
+ .epos span.value{
47
+ color: #666666;
48
+ }
49
+ .epos span.value.transty{
50
+ color: #aaaa55;
51
+ }
52
+ .epos span.value.note{
53
+ color: #aa5555;
54
+ }
@@ -0,0 +1,4 @@
1
+ - if data.length > 0
2
+ h2 = TABS[:symbols]
3
+ - data.each do |div|
4
+ == render(:defins, div[:defins])
@@ -0,0 +1,109 @@
1
+ # -*- coding: utf-8 -*-
2
+ require 'slim'
3
+ require 'epos/text-parser.rb'
4
+
5
+ module Epos
6
+
7
+ class HtmlFormatter
8
+
9
+ def initialize(unmarked: false, compact: false)
10
+ @unmarked = unmarked
11
+ @string_index = compact ? -2 : -1
12
+ @parser = TextParser.new
13
+ @html_base = File.join(File.dirname(__FILE__), "html")
14
+
15
+ load_templates()
16
+ end
17
+
18
+ def format(entry)
19
+ render(:entry, entry)
20
+ end
21
+
22
+ def style
23
+ File.read(File.join(@html_base, "style.css"))
24
+ end
25
+
26
+ protected
27
+
28
+ def load_templates
29
+ slim = Tilt["slim"]
30
+
31
+ @templates = [:extra_tab, :symbols_tab, :idioms_tab, :senses_tab, :defins, :defin_body, :attrs, :headword, :entry]
32
+ @templates = @templates.map{|sym| [sym, slim.new(File.join(@html_base, sym.to_s.gsub(/_/, "-") + ".slim"))]}.to_h
33
+ end
34
+
35
+ def render(item, data)
36
+ @templates[item].render(self, :data => data)
37
+ end
38
+
39
+ def render_fragment(fragment, format)
40
+ tags = format.keys.map{|name| FORMATS[name]}
41
+ raise if tags.index(nil)
42
+
43
+ open = tags .map{|tag| "<" + tag[:name] + (tag[:style] ? " style='#{tag[:style]}'" : "") + ">"}.join
44
+ close = tags.reverse.map{|tag| "</" + tag[:name] + ">"}.join
45
+
46
+ return open + fragment + close
47
+ end
48
+
49
+ def render_text(text)
50
+ @parser.parse(text).map{|fragment, format| render_fragment(fragment, format)}.join
51
+ end
52
+
53
+ FORMATS = {
54
+ "i" => {:name => "i"},
55
+ "b" => {:name => "b"},
56
+ "super" => {:name => "sup"},
57
+ "sub" => {:name => "sub"},
58
+ "ul" => {:name => "span", :style => "text-decoration: underline;"},
59
+ "f5" => {:name => "span", :style => "font-variant: small-caps;"},
60
+ "strike" => {:name => "span", :style => "text-decoration: line-through;"},
61
+ }
62
+
63
+ ATTRS1 = {
64
+ :field => "rubrica",
65
+ :regional => "regionalismo",
66
+ :register => "uso",
67
+ :temporal => "diacronismo",
68
+ :freq => "estatística",
69
+ :derivation => "derivação",
70
+ }
71
+ ATTRS2 = {
72
+ :note => nil,
73
+ }
74
+ ATTRS3 = {
75
+ :transty => nil,
76
+ }
77
+ ATTRS4 = {
78
+ :old_spelling => "forma antiga",
79
+ :trademark => "marca registrada",
80
+ :language => "língua",
81
+ :translation => "tradução",
82
+ :pronunciation => "pronúncia",
83
+ :first_use => "datação",
84
+ :orthoepy => "ortoépia",
85
+ }
86
+ ATTRS5 = {
87
+ :plural => "plural",
88
+ }
89
+
90
+ TABS = {
91
+ :senses => "acepções",
92
+ :idioms => "locuções",
93
+ :symbols => "símbolos e abreviações",
94
+ :grammar => "gramática",
95
+ :grammar_usage => "gramática e uso",
96
+ :usage => "uso",
97
+ :etymology => "etimologia",
98
+ :synonyms => "sinônimos",
99
+ :antonyms => "antônimos",
100
+ :collective => "coletivos",
101
+ :homonyms => "homônimos",
102
+ :paronyms => "parônimos",
103
+ :animals => "vozes de animais",
104
+ }
105
+
106
+ OR = "ou"
107
+
108
+ end
109
+ end
@@ -0,0 +1,35 @@
1
+ require 'epos/encoded-file.rb'
2
+
3
+ module Epos
4
+ class IndexFile
5
+
6
+ def initialize(path)
7
+ data = EncodedFile.read(path)
8
+ lines = data.lines
9
+ @index = {}
10
+ lines.each do |line|
11
+ i = line.index(";")
12
+ key = line[0..i - 1]
13
+ val = line[i + 1..-2].to_i
14
+ if @index.has_key?(key)
15
+ @index[key] << val
16
+ else
17
+ @index[key] = [val]
18
+ end
19
+ end
20
+ end
21
+
22
+ def look_up(key)
23
+ return @index[key] || []
24
+ end
25
+
26
+ def keys
27
+ return @index.keys
28
+ end
29
+
30
+ def has_key?(key)
31
+ @index.has_key?(key)
32
+ end
33
+
34
+ end
35
+ end
@@ -0,0 +1,25 @@
1
+ require 'epos/index-file.rb'
2
+ require 'epos/data-file.rb'
3
+
4
+ module Epos
5
+ class IndexedDataFile
6
+
7
+ def initialize(index_path, data_path)
8
+ @index = IndexFile.new(index_path)
9
+ @data = DataFile.new(data_path)
10
+ end
11
+
12
+ def look_up(key)
13
+ @index.look_up(key).map{|pos| @data.read_entry(pos)}
14
+ end
15
+
16
+ def keys
17
+ @index.keys
18
+ end
19
+
20
+ def has_key?(key)
21
+ @index.has_key?(key)
22
+ end
23
+
24
+ end
25
+ end
@@ -0,0 +1,7 @@
1
+ module Epos
2
+
3
+ def self.search(string)
4
+
5
+ end
6
+
7
+ end
@@ -0,0 +1,184 @@
1
+ # -*- coding: utf-8 -*-
2
+ require 'epos/cp/f1.rb'
3
+ require 'epos/cp/f2.rb'
4
+ require 'epos/cp/f3.rb'
5
+ require 'epos/cp/f4.rb'
6
+ require 'epos/cp/f6.rb'
7
+ require 'epos/cp/f7.rb'
8
+ require 'epos/cp/f8.rb'
9
+ require 'epos/cp/f9.rb'
10
+ require 'epos/cp/f10.rb'
11
+ require 'epos/cp/f11.rb'
12
+ require 'epos/cp/f12.rb'
13
+ require 'epos/cp/f13.rb'
14
+ require 'epos/cp/f16.rb'
15
+
16
+ module Epos
17
+
18
+ class TextParser
19
+
20
+ def parse(text)
21
+ @result = []
22
+ @format = [{}]
23
+ @fragment = ""
24
+ @cmd = ""
25
+
26
+ s = :reading_fragment
27
+ code = ""
28
+
29
+ text.each_char do |c|
30
+
31
+ case s
32
+
33
+ when :reading_fragment
34
+ case c
35
+ when "\\"
36
+ s = :escape_started
37
+ when "{"
38
+ self.flush
39
+ @format << @format.last.clone
40
+ when "}"
41
+ self.flush
42
+ @format.pop if @format.length > 1 # Entry "bum-bum" is broken.
43
+ else
44
+ @fragment << c
45
+ end
46
+
47
+ when :escape_started
48
+ case c
49
+ when "\\"
50
+ @fragment << c
51
+ s = :reading_fragment
52
+ when "{"
53
+ @fragment << c
54
+ s = :reading_fragment
55
+ when "'"
56
+ code = ""
57
+ s = :reading_code
58
+ else
59
+ @cmd = c
60
+ s = :reading_command
61
+ end
62
+
63
+ when :reading_command
64
+ case c
65
+ when " "
66
+ self.command
67
+ s = :reading_fragment
68
+ when "\\"
69
+ self.command
70
+ s = :escape_started
71
+ when /[a-z0-9]/
72
+ @cmd << c
73
+ when "{"
74
+ self.command
75
+ self.flush
76
+ @format << @format.last.clone
77
+ s = :reading_fragment
78
+ when "}"
79
+ self.command
80
+ self.flush
81
+ @format.pop
82
+ s = :reading_fragment
83
+ else
84
+ self.command
85
+ @fragment << c
86
+ s = :reading_fragment
87
+ end
88
+
89
+ when :reading_code
90
+ code << c
91
+ if code.length == 2
92
+ @fragment << [code.to_i(16)].pack("U")
93
+ s = :reading_fragment
94
+ end
95
+ end
96
+ end
97
+
98
+ self.command if s == :reading_command
99
+ self.flush
100
+
101
+ return @result
102
+
103
+ end
104
+
105
+ protected
106
+
107
+ CODE_PAGES = {
108
+ "f1" => CodePage::F1_MAP,
109
+ "f2" => CodePage::F2_MAP,
110
+ "f3" => CodePage::F3_MAP,
111
+ "f4" => CodePage::F4_MAP,
112
+ "f6" => CodePage::F6_MAP,
113
+ "f7" => CodePage::F7_MAP,
114
+ "f8" => CodePage::F8_MAP,
115
+ "f9" => CodePage::F9_MAP,
116
+ "f10" => CodePage::F10_MAP,
117
+ "f11" => CodePage::F11_MAP,
118
+ "f12" => CodePage::F12_MAP,
119
+ "f13" => CodePage::F13_MAP,
120
+ "f16" => CodePage::F16_MAP,
121
+ }
122
+
123
+ def convert_encoding(text, f)
124
+ s = ""
125
+ cp = CODE_PAGES[f]
126
+ text.each_char do |c|
127
+ case
128
+ when cp[c]
129
+ s << cp[c]
130
+ when f == "f1" && !(0x80..0xa0).include?(c.ord)
131
+ s << c
132
+ else
133
+ raise "#{f}:#{c}:#{c.ord.to_s(16)}"
134
+ end
135
+ end
136
+ s
137
+ end
138
+
139
+ def flush
140
+ format = @format.last
141
+
142
+ # This happens *once* ("mico-leão") in all of Houaiss.
143
+ @fragment.upcase! if format["caps"]
144
+ format.delete("caps")
145
+
146
+ @fragment = self.convert_encoding(@fragment, format["f"] || "f1")
147
+ format.delete("f")
148
+
149
+ @result << [@fragment, format]
150
+ @fragment = ""
151
+ end
152
+
153
+ def command
154
+ self.flush if @fragment.length > 0
155
+
156
+ case
157
+ when @cmd =~ /f[0-9][0-9]?/ && @cmd != "f5"
158
+ @cmd = "f1" if @cmd == "f0"
159
+ @format.last["f"] = @cmd
160
+
161
+ when ["lang1023", "lang1046", "ltrpar", "li100", "sa100", "sb100", "fs20", "fs22", "fs24"].include?(@cmd)
162
+ # what is this i don't even
163
+
164
+ when @cmd == "ulnone" then @format.last.delete("ul")
165
+ when @cmd == "i0" then @format.last.delete("i")
166
+ when @cmd == "caps0" then @format.last.delete("caps")
167
+ when @cmd == "strike0" then @format.last.delete("strike")
168
+ when @cmd == "nosupersub" then @format.last.delete("super")
169
+ @format.last.delete("sub")
170
+
171
+ when @cmd == "bullet" then @fragment << "·"
172
+ when @cmd == "lquote" then @fragment << "‘"
173
+ when @cmd == "rquote" then @fragment << "’"
174
+ when @cmd == "ldblquote" then @fragment << '“'
175
+ when @cmd == "rdblquote" then @fragment << '”'
176
+
177
+ else
178
+ @format.last[@cmd] = true
179
+ end
180
+
181
+ end
182
+
183
+ end
184
+ end
data/lib/epos.rb ADDED
@@ -0,0 +1,9 @@
1
+ require 'epos/data-file.rb'
2
+ require 'epos/dictionary.rb'
3
+ require 'epos/encoded-file.rb'
4
+ require 'epos/entry-parser.rb'
5
+ require 'epos/html-formatter.rb'
6
+ require 'epos/indexed-data-file.rb'
7
+ require 'epos/index-file.rb'
8
+ require 'epos/search.rb'
9
+ require 'epos/text-parser.rb'