epos 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,157 @@
1
+ # -*- coding: utf-8 -*-
2
+ module Epos
3
+
4
+ class EntryParser
5
+
6
+ def parse(text)
7
+
8
+ entry = {
9
+ :headword => nil,
10
+ :n => nil,
11
+ :trademark => nil,
12
+ :translation => nil,
13
+ :pronunciation => nil,
14
+ :language => nil,
15
+ :first_use => nil,
16
+ :orthoepy => nil,
17
+ :variants => [],
18
+ :old_spelling => nil,
19
+ :see => false,
20
+ :tabs => [],
21
+ :divs => [],
22
+ }
23
+
24
+ div = nil
25
+ defin = nil
26
+ either = nil
27
+
28
+ text.lines.each do |line|
29
+ code = line[0]
30
+ suffix = line[1..-1].strip
31
+
32
+ case code
33
+
34
+ when "*"
35
+ entry[:headword] = suffix
36
+ when ">"
37
+ entry[:see] = true
38
+ when "t"
39
+ entry[:translation] = suffix
40
+ when "p"
41
+ entry[:pronunciation] = suffix
42
+ when "n"
43
+ entry[:n] = suffix.to_i
44
+ when "L"
45
+ entry[:language] = suffix.split("|")
46
+ when "®"
47
+ entry[:trademark] = true
48
+ when "d"
49
+ entry[:first_use] = suffix
50
+ when "o"
51
+ entry[:orthoepy] = suffix
52
+ when "M"
53
+ entry[:variants] = suffix.split("|")
54
+ when "$"
55
+ entry[:old_spelling] = suffix
56
+ when "0"
57
+ entry[:tabs] << {:type => :grammar, :text => suffix}
58
+ when "1"
59
+ entry[:tabs] << {:type => :grammar_usage, :text => suffix}
60
+ when "2"
61
+ entry[:tabs] << {:type => :usage, :text => suffix}
62
+ when "3"
63
+ entry[:tabs] << {:type => :etymology, :text => suffix}
64
+ when "4"
65
+ entry[:tabs] << {:type => :synonyms, :text => suffix}
66
+ when "5"
67
+ entry[:tabs] << {:type => :antonyms, :text => suffix}
68
+ when "6"
69
+ entry[:tabs] << {:type => :collective, :text => suffix}
70
+ when "7"
71
+ entry[:tabs] << {:type => :homonyms, :text => suffix}
72
+ when "8"
73
+ entry[:tabs] << {:type => :paronyms, :text => suffix}
74
+ when "9"
75
+ entry[:tabs] << {:type => :animals, :text => suffix}
76
+
77
+ when "B"
78
+ div = {
79
+ :pos => nil,
80
+ :idiom => nil,
81
+ :field => nil,
82
+ :regional => nil,
83
+ :register => nil,
84
+ :temporal => nil,
85
+ :freq => nil,
86
+ :plural => nil,
87
+ :symbols => nil,
88
+ :derivation => nil,
89
+ :defins => [],
90
+ }
91
+ entry[:divs] << div
92
+ either = div
93
+ defin = nil
94
+ when "C"
95
+ div[:pos] = suffix.split("|")
96
+ when "#"
97
+ div[:idiom] = suffix
98
+ when "P"
99
+ div[:plural] = suffix
100
+ when "s"
101
+ div[:symbols] = true
102
+
103
+ when "A"
104
+ defin = {
105
+ :num => nil,
106
+ :text => nil,
107
+ :transty => nil,
108
+ :examples => [],
109
+ :note => nil,
110
+ :derivation => nil,
111
+ :field => nil,
112
+ :register => nil,
113
+ :freq => nil,
114
+ :regional => nil,
115
+ :temporal => nil,
116
+ :see => entry[:see],
117
+ }
118
+ either = defin
119
+ div[:defins] << defin
120
+ when "-"
121
+ defin[:num] = suffix
122
+ when ":"
123
+ defin[:text] = suffix
124
+ when "r"
125
+ defin[:transty] = suffix.split("|")
126
+ when "<"
127
+ defin[:examples] = suffix.split("|")
128
+ when "!"
129
+ defin[:note] = suffix
130
+ when "D"
131
+ either[:derivation] = suffix.split("|")
132
+ when "T"
133
+ either[:field] = suffix.split("|")
134
+ when "U"
135
+ either[:register] = suffix.split("|")
136
+ when "E"
137
+ either[:freq] = suffix.split("|")
138
+ when "R"
139
+ either[:regional] = suffix.split("|")
140
+ when "I"
141
+ either[:temporal] = suffix.split("|")
142
+
143
+ when "c" # hidden, apparently
144
+ when "S" # unknown
145
+ when "v" # unknown
146
+ when "\\" # coding error
147
+ else
148
+ raise "#{code}#{suffix}"
149
+ end
150
+ end
151
+
152
+ return entry
153
+ end
154
+
155
+ end
156
+
157
+ end
@@ -0,0 +1,14 @@
1
+ - [ATTRS1, ATTRS2, ATTRS3, ATTRS4, ATTRS5].each do |attrs|
2
+ - existing = attrs.select{|name| data[name]}
3
+ - if existing.length > 0
4
+ p.attrs
5
+ - existing.each_with_index do |(name, label), index|
6
+ - if index > 0
7
+ = '; '
8
+ - if label
9
+ span.label = label
10
+ - if data[name].is_a?(String) || data[name].is_a?(Array)
11
+ - if label
12
+ = ' '
13
+ span class=('value ' + name.to_s.gsub(/_/, '-'))
14
+ == render_text(data[name].is_a?(String) ? data[name] : data[name][@string_index])
@@ -0,0 +1,8 @@
1
+ - if data[:text]
2
+ p.defin
3
+ - if data[:see]
4
+ = "→ "
5
+ == render_text(data[:text])
6
+ == render(:attrs, data)
7
+ - data[:examples].each do |example|
8
+ p.example == render_text(example)
@@ -0,0 +1,8 @@
1
+ - if data.length == 1 && !data.first[:num]
2
+ == render(:defin_body, data.first)
3
+ - else
4
+ table
5
+ - data.each do |defin|
6
+ tr
7
+ td.num valign='top' = defin[:num]
8
+ td == render(:defin_body, defin)
@@ -0,0 +1,7 @@
1
+ div.epos
2
+ == render(:headword, data)
3
+ == render(:senses_tab, data[:divs].reject{|div| div[:idiom] || div[:symbols]})
4
+ == render(:symbols_tab, data[:divs].select{|div| div[:symbols]})
5
+ == render(:idioms_tab, data[:divs].select{|div| div[:idiom]})
6
+ - data[:tabs].each do |tab|
7
+ == render(:extra_tab, tab)
@@ -0,0 +1,3 @@
1
+ - if data
2
+ h2 = TABS[data[:type]]
3
+ == render_text(data[:text])
@@ -0,0 +1,14 @@
1
+ h1
2
+ = '■ '
3
+ - if data[:n]
4
+ sup = data[:n]
5
+ - if data[:language]
6
+ i == render_text(data[:headword])
7
+ - else
8
+ == render_text(data[:headword])
9
+ - vars = data[:variants].map{|var| render_text(var)}
10
+ - if vars.size > 0
11
+ == vars[0..-2].map{|alt| ", #{alt}"}.join
12
+ == " #{OR} "
13
+ == vars.last
14
+ == render(:attrs, data)
@@ -0,0 +1,8 @@
1
+ - if data.length > 0
2
+ h2 = TABS[:idioms]
3
+ - data.each do |div|
4
+ h3.idiom
5
+ = '‣ '
6
+ == render_text(div[:idiom]).gsub(/@/, "<span class='no-bold'>#{OR}</span>")
7
+ == render(:attrs, div)
8
+ == render(:defins, div[:defins])
@@ -0,0 +1,8 @@
1
+ - if data.length > 0
2
+ - if !@unmarked
3
+ h2 = TABS[:senses]
4
+ - data.each do |div|
5
+ - if div[:pos]
6
+ h3 = div[:pos][@string_index]
7
+ == render(:attrs, div)
8
+ == render(:defins, div[:defins])
@@ -0,0 +1,54 @@
1
+ .epos{
2
+ font-family: "Noto Sans", "DejaVu Sans", sans-serif;
3
+ color: #333333;
4
+ }
5
+
6
+ .epos h1, .epos h2, .epos h3{
7
+ margin-bottom: 0;
8
+ font-weight: bold;
9
+ }
10
+ .epos h3{
11
+ margin: 0;
12
+ }
13
+ .epos h1{
14
+ font-size: 120%;
15
+ }
16
+ .epos h2{
17
+ font-size: 110%;
18
+ }
19
+ .epos h3{
20
+ font-size: 100%;
21
+ }
22
+
23
+ .epos td{
24
+ padding: 0;
25
+ }
26
+ .epos td.num{
27
+ font-weight: bold;
28
+ padding-right: 5px;
29
+ }
30
+
31
+ .epos p{
32
+ margin: 0;
33
+ }
34
+ .epos p.example{
35
+ color: #006600;
36
+ font-style: italic;
37
+ }
38
+ .epos .no-bold{
39
+ font-weight: normal;
40
+ }
41
+
42
+ .epos span.label{
43
+ font-style: italic;
44
+ color: #2e3e8c;
45
+ }
46
+ .epos span.value{
47
+ color: #666666;
48
+ }
49
+ .epos span.value.transty{
50
+ color: #aaaa55;
51
+ }
52
+ .epos span.value.note{
53
+ color: #aa5555;
54
+ }
@@ -0,0 +1,4 @@
1
+ - if data.length > 0
2
+ h2 = TABS[:symbols]
3
+ - data.each do |div|
4
+ == render(:defins, div[:defins])
@@ -0,0 +1,109 @@
1
+ # -*- coding: utf-8 -*-
2
+ require 'slim'
3
+ require 'epos/text-parser.rb'
4
+
5
+ module Epos
6
+
7
+ class HtmlFormatter
8
+
9
+ def initialize(unmarked: false, compact: false)
10
+ @unmarked = unmarked
11
+ @string_index = compact ? -2 : -1
12
+ @parser = TextParser.new
13
+ @html_base = File.join(File.dirname(__FILE__), "html")
14
+
15
+ load_templates()
16
+ end
17
+
18
+ def format(entry)
19
+ render(:entry, entry)
20
+ end
21
+
22
+ def style
23
+ File.read(File.join(@html_base, "style.css"))
24
+ end
25
+
26
+ protected
27
+
28
+ def load_templates
29
+ slim = Tilt["slim"]
30
+
31
+ @templates = [:extra_tab, :symbols_tab, :idioms_tab, :senses_tab, :defins, :defin_body, :attrs, :headword, :entry]
32
+ @templates = @templates.map{|sym| [sym, slim.new(File.join(@html_base, sym.to_s.gsub(/_/, "-") + ".slim"))]}.to_h
33
+ end
34
+
35
+ def render(item, data)
36
+ @templates[item].render(self, :data => data)
37
+ end
38
+
39
+ def render_fragment(fragment, format)
40
+ tags = format.keys.map{|name| FORMATS[name]}
41
+ raise if tags.index(nil)
42
+
43
+ open = tags .map{|tag| "<" + tag[:name] + (tag[:style] ? " style='#{tag[:style]}'" : "") + ">"}.join
44
+ close = tags.reverse.map{|tag| "</" + tag[:name] + ">"}.join
45
+
46
+ return open + fragment + close
47
+ end
48
+
49
+ def render_text(text)
50
+ @parser.parse(text).map{|fragment, format| render_fragment(fragment, format)}.join
51
+ end
52
+
53
+ FORMATS = {
54
+ "i" => {:name => "i"},
55
+ "b" => {:name => "b"},
56
+ "super" => {:name => "sup"},
57
+ "sub" => {:name => "sub"},
58
+ "ul" => {:name => "span", :style => "text-decoration: underline;"},
59
+ "f5" => {:name => "span", :style => "font-variant: small-caps;"},
60
+ "strike" => {:name => "span", :style => "text-decoration: line-through;"},
61
+ }
62
+
63
+ ATTRS1 = {
64
+ :field => "rubrica",
65
+ :regional => "regionalismo",
66
+ :register => "uso",
67
+ :temporal => "diacronismo",
68
+ :freq => "estatística",
69
+ :derivation => "derivação",
70
+ }
71
+ ATTRS2 = {
72
+ :note => nil,
73
+ }
74
+ ATTRS3 = {
75
+ :transty => nil,
76
+ }
77
+ ATTRS4 = {
78
+ :old_spelling => "forma antiga",
79
+ :trademark => "marca registrada",
80
+ :language => "língua",
81
+ :translation => "tradução",
82
+ :pronunciation => "pronúncia",
83
+ :first_use => "datação",
84
+ :orthoepy => "ortoépia",
85
+ }
86
+ ATTRS5 = {
87
+ :plural => "plural",
88
+ }
89
+
90
+ TABS = {
91
+ :senses => "acepções",
92
+ :idioms => "locuções",
93
+ :symbols => "símbolos e abreviações",
94
+ :grammar => "gramática",
95
+ :grammar_usage => "gramática e uso",
96
+ :usage => "uso",
97
+ :etymology => "etimologia",
98
+ :synonyms => "sinônimos",
99
+ :antonyms => "antônimos",
100
+ :collective => "coletivos",
101
+ :homonyms => "homônimos",
102
+ :paronyms => "parônimos",
103
+ :animals => "vozes de animais",
104
+ }
105
+
106
+ OR = "ou"
107
+
108
+ end
109
+ end
@@ -0,0 +1,35 @@
1
+ require 'epos/encoded-file.rb'
2
+
3
+ module Epos
4
+ class IndexFile
5
+
6
+ def initialize(path)
7
+ data = EncodedFile.read(path)
8
+ lines = data.lines
9
+ @index = {}
10
+ lines.each do |line|
11
+ i = line.index(";")
12
+ key = line[0..i - 1]
13
+ val = line[i + 1..-2].to_i
14
+ if @index.has_key?(key)
15
+ @index[key] << val
16
+ else
17
+ @index[key] = [val]
18
+ end
19
+ end
20
+ end
21
+
22
+ def look_up(key)
23
+ return @index[key] || []
24
+ end
25
+
26
+ def keys
27
+ return @index.keys
28
+ end
29
+
30
+ def has_key?(key)
31
+ @index.has_key?(key)
32
+ end
33
+
34
+ end
35
+ end
@@ -0,0 +1,25 @@
1
+ require 'epos/index-file.rb'
2
+ require 'epos/data-file.rb'
3
+
4
+ module Epos
5
+ class IndexedDataFile
6
+
7
+ def initialize(index_path, data_path)
8
+ @index = IndexFile.new(index_path)
9
+ @data = DataFile.new(data_path)
10
+ end
11
+
12
+ def look_up(key)
13
+ @index.look_up(key).map{|pos| @data.read_entry(pos)}
14
+ end
15
+
16
+ def keys
17
+ @index.keys
18
+ end
19
+
20
+ def has_key?(key)
21
+ @index.has_key?(key)
22
+ end
23
+
24
+ end
25
+ end
@@ -0,0 +1,7 @@
1
+ module Epos
2
+
3
+ def self.search(string)
4
+
5
+ end
6
+
7
+ end
@@ -0,0 +1,184 @@
1
+ # -*- coding: utf-8 -*-
2
+ require 'epos/cp/f1.rb'
3
+ require 'epos/cp/f2.rb'
4
+ require 'epos/cp/f3.rb'
5
+ require 'epos/cp/f4.rb'
6
+ require 'epos/cp/f6.rb'
7
+ require 'epos/cp/f7.rb'
8
+ require 'epos/cp/f8.rb'
9
+ require 'epos/cp/f9.rb'
10
+ require 'epos/cp/f10.rb'
11
+ require 'epos/cp/f11.rb'
12
+ require 'epos/cp/f12.rb'
13
+ require 'epos/cp/f13.rb'
14
+ require 'epos/cp/f16.rb'
15
+
16
+ module Epos
17
+
18
+ class TextParser
19
+
20
+ def parse(text)
21
+ @result = []
22
+ @format = [{}]
23
+ @fragment = ""
24
+ @cmd = ""
25
+
26
+ s = :reading_fragment
27
+ code = ""
28
+
29
+ text.each_char do |c|
30
+
31
+ case s
32
+
33
+ when :reading_fragment
34
+ case c
35
+ when "\\"
36
+ s = :escape_started
37
+ when "{"
38
+ self.flush
39
+ @format << @format.last.clone
40
+ when "}"
41
+ self.flush
42
+ @format.pop if @format.length > 1 # Entry "bum-bum" is broken.
43
+ else
44
+ @fragment << c
45
+ end
46
+
47
+ when :escape_started
48
+ case c
49
+ when "\\"
50
+ @fragment << c
51
+ s = :reading_fragment
52
+ when "{"
53
+ @fragment << c
54
+ s = :reading_fragment
55
+ when "'"
56
+ code = ""
57
+ s = :reading_code
58
+ else
59
+ @cmd = c
60
+ s = :reading_command
61
+ end
62
+
63
+ when :reading_command
64
+ case c
65
+ when " "
66
+ self.command
67
+ s = :reading_fragment
68
+ when "\\"
69
+ self.command
70
+ s = :escape_started
71
+ when /[a-z0-9]/
72
+ @cmd << c
73
+ when "{"
74
+ self.command
75
+ self.flush
76
+ @format << @format.last.clone
77
+ s = :reading_fragment
78
+ when "}"
79
+ self.command
80
+ self.flush
81
+ @format.pop
82
+ s = :reading_fragment
83
+ else
84
+ self.command
85
+ @fragment << c
86
+ s = :reading_fragment
87
+ end
88
+
89
+ when :reading_code
90
+ code << c
91
+ if code.length == 2
92
+ @fragment << [code.to_i(16)].pack("U")
93
+ s = :reading_fragment
94
+ end
95
+ end
96
+ end
97
+
98
+ self.command if s == :reading_command
99
+ self.flush
100
+
101
+ return @result
102
+
103
+ end
104
+
105
+ protected
106
+
107
+ CODE_PAGES = {
108
+ "f1" => CodePage::F1_MAP,
109
+ "f2" => CodePage::F2_MAP,
110
+ "f3" => CodePage::F3_MAP,
111
+ "f4" => CodePage::F4_MAP,
112
+ "f6" => CodePage::F6_MAP,
113
+ "f7" => CodePage::F7_MAP,
114
+ "f8" => CodePage::F8_MAP,
115
+ "f9" => CodePage::F9_MAP,
116
+ "f10" => CodePage::F10_MAP,
117
+ "f11" => CodePage::F11_MAP,
118
+ "f12" => CodePage::F12_MAP,
119
+ "f13" => CodePage::F13_MAP,
120
+ "f16" => CodePage::F16_MAP,
121
+ }
122
+
123
+ def convert_encoding(text, f)
124
+ s = ""
125
+ cp = CODE_PAGES[f]
126
+ text.each_char do |c|
127
+ case
128
+ when cp[c]
129
+ s << cp[c]
130
+ when f == "f1" && !(0x80..0xa0).include?(c.ord)
131
+ s << c
132
+ else
133
+ raise "#{f}:#{c}:#{c.ord.to_s(16)}"
134
+ end
135
+ end
136
+ s
137
+ end
138
+
139
+ def flush
140
+ format = @format.last
141
+
142
+ # This happens *once* ("mico-leão") in all of Houaiss.
143
+ @fragment.upcase! if format["caps"]
144
+ format.delete("caps")
145
+
146
+ @fragment = self.convert_encoding(@fragment, format["f"] || "f1")
147
+ format.delete("f")
148
+
149
+ @result << [@fragment, format]
150
+ @fragment = ""
151
+ end
152
+
153
+ def command
154
+ self.flush if @fragment.length > 0
155
+
156
+ case
157
+ when @cmd =~ /f[0-9][0-9]?/ && @cmd != "f5"
158
+ @cmd = "f1" if @cmd == "f0"
159
+ @format.last["f"] = @cmd
160
+
161
+ when ["lang1023", "lang1046", "ltrpar", "li100", "sa100", "sb100", "fs20", "fs22", "fs24"].include?(@cmd)
162
+ # what is this i don't even
163
+
164
+ when @cmd == "ulnone" then @format.last.delete("ul")
165
+ when @cmd == "i0" then @format.last.delete("i")
166
+ when @cmd == "caps0" then @format.last.delete("caps")
167
+ when @cmd == "strike0" then @format.last.delete("strike")
168
+ when @cmd == "nosupersub" then @format.last.delete("super")
169
+ @format.last.delete("sub")
170
+
171
+ when @cmd == "bullet" then @fragment << "·"
172
+ when @cmd == "lquote" then @fragment << "‘"
173
+ when @cmd == "rquote" then @fragment << "’"
174
+ when @cmd == "ldblquote" then @fragment << '“'
175
+ when @cmd == "rdblquote" then @fragment << '”'
176
+
177
+ else
178
+ @format.last[@cmd] = true
179
+ end
180
+
181
+ end
182
+
183
+ end
184
+ end
data/lib/epos.rb ADDED
@@ -0,0 +1,9 @@
1
+ require 'epos/data-file.rb'
2
+ require 'epos/dictionary.rb'
3
+ require 'epos/encoded-file.rb'
4
+ require 'epos/entry-parser.rb'
5
+ require 'epos/html-formatter.rb'
6
+ require 'epos/indexed-data-file.rb'
7
+ require 'epos/index-file.rb'
8
+ require 'epos/search.rb'
9
+ require 'epos/text-parser.rb'