epos 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE +674 -0
- data/README.md +45 -0
- data/README.pt-br.md +43 -0
- data/epico.png +0 -0
- data/epos.gemspec +20 -0
- data/examples/web.rb +9 -0
- data/lib/epos/cp/f1.rb +14 -0
- data/lib/epos/cp/f10.rb +18 -0
- data/lib/epos/cp/f11.rb +51 -0
- data/lib/epos/cp/f12.rb +19 -0
- data/lib/epos/cp/f13.rb +21 -0
- data/lib/epos/cp/f16.rb +8 -0
- data/lib/epos/cp/f2.rb +55 -0
- data/lib/epos/cp/f3.rb +19 -0
- data/lib/epos/cp/f4.rb +19 -0
- data/lib/epos/cp/f6.rb +34 -0
- data/lib/epos/cp/f7.rb +19 -0
- data/lib/epos/cp/f8.rb +29 -0
- data/lib/epos/cp/f9.rb +14 -0
- data/lib/epos/data-file.rb +28 -0
- data/lib/epos/dictionary.rb +104 -0
- data/lib/epos/encoded-file.rb +38 -0
- data/lib/epos/entry-parser.rb +157 -0
- data/lib/epos/html/attrs.slim +14 -0
- data/lib/epos/html/defin-body.slim +8 -0
- data/lib/epos/html/defins.slim +8 -0
- data/lib/epos/html/entry.slim +7 -0
- data/lib/epos/html/extra-tab.slim +3 -0
- data/lib/epos/html/headword.slim +14 -0
- data/lib/epos/html/idioms-tab.slim +8 -0
- data/lib/epos/html/senses-tab.slim +8 -0
- data/lib/epos/html/style.css +54 -0
- data/lib/epos/html/symbols-tab.slim +4 -0
- data/lib/epos/html-formatter.rb +109 -0
- data/lib/epos/index-file.rb +35 -0
- data/lib/epos/indexed-data-file.rb +25 -0
- data/lib/epos/search.rb +7 -0
- data/lib/epos/text-parser.rb +184 -0
- data/lib/epos.rb +9 -0
- metadata +97 -0
data/README.md
ADDED
@@ -0,0 +1,45 @@
|
|
1
|
+
Epos
|
2
|
+
====
|
3
|
+
|
4
|
+
Epos, from the Greek ἔπος (“word”), is a Ruby gem that gives you access to the
|
5
|
+
data files of the excellent
|
6
|
+
[Houaiss dictionary](http://www.objetiva.com.br/livro_ficha.php?id=707).
|
7
|
+
|
8
|
+
Please notice that you must
|
9
|
+
[buy the dictionary](http://www.objetiva.com.br/livro_ficha.php?id=707) in order
|
10
|
+
to use this gem.
|
11
|
+
|
12
|
+
Features
|
13
|
+
--------
|
14
|
+
|
15
|
+
* Entries can be retrieved as HTML.
|
16
|
+
* Both the main dictionary and the morpheme one are supported.
|
17
|
+
* There's a parser for the structure used by entries.
|
18
|
+
* There's a parser for the RTF-like format used by the text.
|
19
|
+
* Characters are rendered correctly, even those in the etymology section or in
|
20
|
+
the pronunciation of foreign words.
|
21
|
+
|
22
|
+
Example
|
23
|
+
-------
|
24
|
+
|
25
|
+
The following example looks up the word *épico* and saves it to a file:
|
26
|
+
|
27
|
+
require 'epos'
|
28
|
+
dict = Epos::Dictionary.new("/home/user/houaiss")
|
29
|
+
File.write("épico.html", dict.look_up("épico"))
|
30
|
+
|
31
|
+
This is what it should look like:
|
32
|
+
|
33
|
+

|
34
|
+
|
35
|
+
Limitations
|
36
|
+
-----------
|
37
|
+
|
38
|
+
* No support for animal sounds, verb conjugation, etc.
|
39
|
+
* No nice interface.
|
40
|
+
|
41
|
+
Compatibility
|
42
|
+
-------------
|
43
|
+
|
44
|
+
I know it works with version 2009.6 (July 2013). I don't know about any other
|
45
|
+
version.
|
data/README.pt-br.md
ADDED
@@ -0,0 +1,43 @@
|
|
1
|
+
Epos
|
2
|
+
====
|
3
|
+
|
4
|
+
Epos, do grego ἔπος (“palavra”), é uma gem de Ruby que dá acesso aos arquivos de
|
5
|
+
dados do excelente
|
6
|
+
[dicionário Houaiss eletrônico da língua portuguesa](http://www.objetiva.com.br/livro_ficha.php?id=707).
|
7
|
+
|
8
|
+
Note que você precisa
|
9
|
+
[comprar o dicionário](http://www.objetiva.com.br/livro_ficha.php?id=707) para
|
10
|
+
usar esta gem.
|
11
|
+
|
12
|
+
Funcionalidades
|
13
|
+
---------------
|
14
|
+
|
15
|
+
* Os verbetes podem ser convertidos para HTML.
|
16
|
+
* Há um analisador para a estrutura usada nos verbetes.
|
17
|
+
* Há um analisador para o formato do texto, que é semelhante ao RTF.
|
18
|
+
* Os caracteres aparecem corretamente, mesmo aqueles na parte de etimologia e na
|
19
|
+
pronúncia de palavras estrangeiras.
|
20
|
+
|
21
|
+
Exemplo
|
22
|
+
-------
|
23
|
+
|
24
|
+
O seguinet exemplo busca a definição da palavra *épico* e a salva num arquivo:
|
25
|
+
|
26
|
+
require 'epos'
|
27
|
+
dict = Epos::Dictionary.new("/home/user/houaiss")
|
28
|
+
File.write("épico.html", dict.look_up("épico"))
|
29
|
+
|
30
|
+
Deve gerar algo assim:
|
31
|
+
|
32
|
+

|
33
|
+
|
34
|
+
Limitações
|
35
|
+
----------
|
36
|
+
|
37
|
+
* Não há suporte às vozes de animais, à conjugação verbal, etc.
|
38
|
+
* Não há uma interface amigável.
|
39
|
+
|
40
|
+
Compatibilidade
|
41
|
+
---------------
|
42
|
+
|
43
|
+
Sei que funciona com a versão 2009.6 (Julho de 2013). Não sei sobre as outras.
|
data/epico.png
ADDED
Binary file
|
data/epos.gemspec
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
Gem::Specification.new do |s|
|
3
|
+
s.name = 'epos'
|
4
|
+
s.version = '0.9.0'
|
5
|
+
s.date = '2014-09-05'
|
6
|
+
s.summary = "Access to the Houaiss dictionary data files"
|
7
|
+
s.description = "Provides access to the Houaiss dictionary data files. Entries can be exported as HTML."
|
8
|
+
s.authors = ["Adiel Mittmann"]
|
9
|
+
s.email = 'adiel@mittmann.net.br'
|
10
|
+
s.homepage = 'https://github.com/adiel-mittmann/epos'
|
11
|
+
s.license = 'GPL3'
|
12
|
+
|
13
|
+
s.files = %w(LICENSE README.md README.pt-br.md epico.png epos.gemspec) +
|
14
|
+
Dir["lib/**/*.rb"] +
|
15
|
+
Dir["lib/**/*.slim"] +
|
16
|
+
Dir["lib/**/*.css"] +
|
17
|
+
Dir["examples/**/*.rb"]
|
18
|
+
|
19
|
+
s.add_dependency 'slim', '>= 2.0.0'
|
20
|
+
end
|
data/examples/web.rb
ADDED
data/lib/epos/cp/f1.rb
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
module Epos
|
3
|
+
class CodePage
|
4
|
+
F1_MAP = {
|
5
|
+
"¬" => "-́",
|
6
|
+
"\u0096" => "—",
|
7
|
+
"\u0097" => "—",
|
8
|
+
"\u0086" => "✝",
|
9
|
+
"\u0093" => "“",
|
10
|
+
"\u0094" => "”",
|
11
|
+
"\u009c" => "œ", # A mistake, should be f6, e.g. "causeur".
|
12
|
+
}
|
13
|
+
end
|
14
|
+
end
|
data/lib/epos/cp/f10.rb
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
module Epos
|
3
|
+
class CodePage
|
4
|
+
F10_MAP = {
|
5
|
+
"t" => "ṭ",
|
6
|
+
"h" => "ḥ",
|
7
|
+
"s" => "ṣ",
|
8
|
+
"H" => "Ḥ",
|
9
|
+
"d" => "ḍ",
|
10
|
+
"r" => "ṛ",
|
11
|
+
"S" => "Ṣ",
|
12
|
+
"n" => "ṇ",
|
13
|
+
"z" => "ẓ",
|
14
|
+
"m" => "ṃ",
|
15
|
+
"l" => "ḷ",
|
16
|
+
}
|
17
|
+
end
|
18
|
+
end
|
data/lib/epos/cp/f11.rb
ADDED
@@ -0,0 +1,51 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
module Epos
|
3
|
+
class CodePage
|
4
|
+
F11_MAP = {
|
5
|
+
"®" => "→",
|
6
|
+
"¾" => "—",
|
7
|
+
"a" => "α",
|
8
|
+
"b" => "β",
|
9
|
+
"c" => "χ",
|
10
|
+
"d" => "δ",
|
11
|
+
"Ç" => "∩",
|
12
|
+
"D" => "Δ",
|
13
|
+
"e" => "ε",
|
14
|
+
"È" => "∪",
|
15
|
+
"f" => "φ",
|
16
|
+
"F" => "Φ",
|
17
|
+
"g" => "γ",
|
18
|
+
"G" => "Γ",
|
19
|
+
"h" => "η",
|
20
|
+
"i" => "ι",
|
21
|
+
"k" => "κ",
|
22
|
+
"l" => "λ",
|
23
|
+
"m" => "μ",
|
24
|
+
"Ñ" => "∇",
|
25
|
+
"o" => "ο",
|
26
|
+
"ò" => "ʃ",
|
27
|
+
"Ö" => "√",
|
28
|
+
"º" => "≡",
|
29
|
+
"p" => "π",
|
30
|
+
"q" => "θ",
|
31
|
+
"Q" => "Θ",
|
32
|
+
"r" => "ρ",
|
33
|
+
"s" => "σ",
|
34
|
+
"S" => "Σ",
|
35
|
+
"t" => "τ",
|
36
|
+
"u" => "υ",
|
37
|
+
"U" => "Υ",
|
38
|
+
"V" => "ζ", # "dzeta". Houaiss shows "ς".
|
39
|
+
"w" => "ω",
|
40
|
+
"W" => "Ω",
|
41
|
+
"x" => "ξ",
|
42
|
+
"X" => "Ξ",
|
43
|
+
"y" => "ψ",
|
44
|
+
"Y" => "Ψ",
|
45
|
+
"." => ".",
|
46
|
+
"L" => "Λ",
|
47
|
+
"n" => "ν",
|
48
|
+
"P" => "Π",
|
49
|
+
}
|
50
|
+
end
|
51
|
+
end
|
data/lib/epos/cp/f12.rb
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
module Epos
|
3
|
+
class CodePage
|
4
|
+
F12_MAP = {
|
5
|
+
"¿" => "♭",
|
6
|
+
"Æ" => "𝅘𝅥𝅲",
|
7
|
+
"x" => "𝅘𝅥𝅯",
|
8
|
+
"w" => "𝅝",
|
9
|
+
"r" => "𝅘𝅥𝅰",
|
10
|
+
"q" => "𝅘𝅥",
|
11
|
+
"k" => "♮",
|
12
|
+
"h" => "𝅗𝅥",
|
13
|
+
"e" => "𝅘𝅥𝅮",
|
14
|
+
"B" => "𝄡",
|
15
|
+
"?" => "𝄢",
|
16
|
+
"&" => "𝄞",
|
17
|
+
}
|
18
|
+
end
|
19
|
+
end
|
data/lib/epos/cp/f13.rb
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
module Epos
|
3
|
+
class CodePage
|
4
|
+
F13_MAP = {
|
5
|
+
"ì" => "ĭ",
|
6
|
+
"á" => "ā",
|
7
|
+
"í" => "ī",
|
8
|
+
"ó" => "ō",
|
9
|
+
"è" => "ĕ",
|
10
|
+
"ý" => "ȳ",
|
11
|
+
"ù" => "ŭ",
|
12
|
+
"é" => "ē",
|
13
|
+
"ú" => "ū",
|
14
|
+
"ò" => "ŏ",
|
15
|
+
"à" => "ă",
|
16
|
+
"ø" => "ø",
|
17
|
+
"u" => "u",
|
18
|
+
"i" => "i",
|
19
|
+
}
|
20
|
+
end
|
21
|
+
end
|
data/lib/epos/cp/f16.rb
ADDED
data/lib/epos/cp/f2.rb
ADDED
@@ -0,0 +1,55 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
module Epos
|
3
|
+
class CodePage
|
4
|
+
F2_MAP = {
|
5
|
+
"Á" => "Ā",
|
6
|
+
"Ó" => "Ō",
|
7
|
+
"á" => "ā",
|
8
|
+
"é" => "ē",
|
9
|
+
"í" => "ī",
|
10
|
+
"ó" => "ō",
|
11
|
+
"ú" => "ū",
|
12
|
+
"ý" => "ȳ",
|
13
|
+
"à" => "ă",
|
14
|
+
"è" => "ĕ",
|
15
|
+
"ì" => "ĭ",
|
16
|
+
"ò" => "ŏ",
|
17
|
+
"ù" => "ŭ",
|
18
|
+
"þ" => "y̆",
|
19
|
+
"â" => "ā́",
|
20
|
+
"ê" => "ḗ",
|
21
|
+
"ô" => "ṓ",
|
22
|
+
"î" => "ī́",
|
23
|
+
"ë" => "ẹ",
|
24
|
+
"ö" => "ọ",
|
25
|
+
"ü" => "ụ",
|
26
|
+
"k" => "k",
|
27
|
+
"h" => "h",
|
28
|
+
"d" => "d",
|
29
|
+
"n" => "n",
|
30
|
+
"o" => "o",
|
31
|
+
"c" => "c",
|
32
|
+
"e" => "e",
|
33
|
+
"m" => "m",
|
34
|
+
"i" => "i",
|
35
|
+
"r" => "r",
|
36
|
+
"l" => "l",
|
37
|
+
"u" => "u",
|
38
|
+
"s" => "s",
|
39
|
+
"a" => "a",
|
40
|
+
"i" => "i",
|
41
|
+
"t" => "t",
|
42
|
+
"g" => "g",
|
43
|
+
"y" => "y",
|
44
|
+
">" => ">",
|
45
|
+
"'" => "'",
|
46
|
+
"," => ",",
|
47
|
+
"-" => "-",
|
48
|
+
" " => " ",
|
49
|
+
")" => ")",
|
50
|
+
"\u0096" => "—",
|
51
|
+
"\u0097" => "—",
|
52
|
+
"¬" => "-́",
|
53
|
+
}
|
54
|
+
end
|
55
|
+
end
|
data/lib/epos/cp/f3.rb
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
module Epos
|
3
|
+
class CodePage
|
4
|
+
F3_MAP = {
|
5
|
+
"\u0096" => "-́",
|
6
|
+
"g" => "ĝ",
|
7
|
+
"e" => "ẽ",
|
8
|
+
"i" => "ĩ",
|
9
|
+
"u" => "ũ",
|
10
|
+
"¬" => "-",
|
11
|
+
"-" => "-",
|
12
|
+
"j" => "j̈",
|
13
|
+
"s" => "s",
|
14
|
+
"y" => "ỹ",
|
15
|
+
" " => " ",
|
16
|
+
"k" => "ǩ",
|
17
|
+
}
|
18
|
+
end
|
19
|
+
end
|
data/lib/epos/cp/f4.rb
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
module Epos
|
3
|
+
# "y" is used in two entries: "evonim-" and "mormir(o)-". In both cases, it
|
4
|
+
# is shown as a "y" with something weird below. In the first entry, it is
|
5
|
+
# used to represent the *short* "y" in "euonymus"; in the second, the *long*
|
6
|
+
# "y" in "mormyr". So...
|
7
|
+
class CodePage
|
8
|
+
F4_MAP = {
|
9
|
+
"H" => "ʰ",
|
10
|
+
"N" => "ⁿ",
|
11
|
+
"T" => "ᵗ",
|
12
|
+
"S" => "ˢ",
|
13
|
+
"J" => "ʲ",
|
14
|
+
"ï" => "ï",
|
15
|
+
"D" => "ᵈ",
|
16
|
+
"y" => "y",
|
17
|
+
}
|
18
|
+
end
|
19
|
+
end
|
data/lib/epos/cp/f6.rb
ADDED
@@ -0,0 +1,34 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
module Epos
|
3
|
+
class CodePage
|
4
|
+
F6_MAP = {
|
5
|
+
"«" => "ə",
|
6
|
+
'"' => "ɪ",
|
7
|
+
"E" => "ɛ",
|
8
|
+
"S" => "ʃ",
|
9
|
+
"\u0086" => "ɔ",
|
10
|
+
"A" => "ɑ",
|
11
|
+
"N" => "ŋ",
|
12
|
+
"Z" => "ʒ",
|
13
|
+
"Ã" => "ʌ",
|
14
|
+
"¡" => "ɑ̃",
|
15
|
+
"Î" => "ɜ",
|
16
|
+
"é" => "ɛ̃",
|
17
|
+
"U" => "ʊ",
|
18
|
+
"\u009c" => "œ",
|
19
|
+
"ø" => "ɲ",
|
20
|
+
"Ö" => "ʕ",
|
21
|
+
"ç" => "ɥ",
|
22
|
+
"|" => "ɫ",
|
23
|
+
"ö" => "ɨ",
|
24
|
+
"¢" => "ɔ̃",
|
25
|
+
"´" => "ʎ",
|
26
|
+
"l" => "ḷ",
|
27
|
+
"â" => "a͡a",
|
28
|
+
"¸" => "ɸ",
|
29
|
+
"D" => "ð",
|
30
|
+
"\u008a" => "ș",
|
31
|
+
"t" => "ț",
|
32
|
+
}
|
33
|
+
end
|
34
|
+
end
|
data/lib/epos/cp/f7.rb
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
module Epos
|
3
|
+
class CodePage
|
4
|
+
F7_MAP = {
|
5
|
+
"r" => "ɹ",
|
6
|
+
"e" => "ẹ",
|
7
|
+
"·" => "ẹ",
|
8
|
+
"\u0096" => "o̧",
|
9
|
+
"¢" => "ɔ̃",
|
10
|
+
"m" => "ɯ",
|
11
|
+
"¡" => "ɑ̃",
|
12
|
+
"ø" => "ɲ",
|
13
|
+
"«" => "ə",
|
14
|
+
"w" => "ẅ",
|
15
|
+
"M" => ">", # "antilambda", but see http://en.wikipedia.org/wiki/Diple_(textual_symbol)
|
16
|
+
"A" => "ɑ",
|
17
|
+
}
|
18
|
+
end
|
19
|
+
end
|
data/lib/epos/cp/f8.rb
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
module Epos
|
3
|
+
class CodePage
|
4
|
+
# Characters in this page produce a line above some of the characters that
|
5
|
+
# precede them. Their purpose is to indicate the pitch in Japanese words.
|
6
|
+
# Lowercase letters indicate that the pitch remains the same, uppercase that
|
7
|
+
# it drops. The closer a letter is to the beginning of the alphabet, the
|
8
|
+
# shorter the line that it produces.
|
9
|
+
#
|
10
|
+
# The length of the line does not match well letter boundaries. It's
|
11
|
+
# difficult to establish how many letters should be affected.
|
12
|
+
F8_MAP = {
|
13
|
+
"c" => "",
|
14
|
+
"d" => "",
|
15
|
+
"e" => "",
|
16
|
+
"f" => "",
|
17
|
+
"h" => "",
|
18
|
+
"j" => "",
|
19
|
+
|
20
|
+
"A" => "˺",
|
21
|
+
"C" => "˺",
|
22
|
+
"D" => "˺",
|
23
|
+
"E" => "˺",
|
24
|
+
"F" => "˺",
|
25
|
+
"G" => "˺",
|
26
|
+
"H" => "˺",
|
27
|
+
}
|
28
|
+
end
|
29
|
+
end
|
data/lib/epos/cp/f9.rb
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
require 'epos/encoded-file.rb'
|
2
|
+
|
3
|
+
module Epos
|
4
|
+
|
5
|
+
class DataFile
|
6
|
+
|
7
|
+
def initialize(path)
|
8
|
+
@file = EncodedFile.new(path)
|
9
|
+
end
|
10
|
+
|
11
|
+
def read_entry(pos)
|
12
|
+
@file.seek(pos + 1)
|
13
|
+
s = "*"
|
14
|
+
while !@file.eof?
|
15
|
+
t = @file.read(1024)
|
16
|
+
break if t == nil || t == ""
|
17
|
+
s << t
|
18
|
+
i = s.index("\n*")
|
19
|
+
if i
|
20
|
+
s = s[0..i]
|
21
|
+
break
|
22
|
+
end
|
23
|
+
end
|
24
|
+
return s
|
25
|
+
end
|
26
|
+
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,104 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
require 'epos/indexed-data-file.rb'
|
3
|
+
require 'epos/html-formatter.rb'
|
4
|
+
require 'epos/entry-parser.rb'
|
5
|
+
|
6
|
+
module Epos
|
7
|
+
class Dictionary
|
8
|
+
|
9
|
+
def initialize(path)
|
10
|
+
@idf1 = Epos::IndexedDataFile.new(File.join(path, "deah002.dhn"), File.join(path, "deah001.dhn"))
|
11
|
+
@idf2 = Epos::IndexedDataFile.new(File.join(path, "deah008.dhn"), File.join(path, "deah007.dhn"))
|
12
|
+
|
13
|
+
@fmt1 = Epos::HtmlFormatter.new(unmarked: false)
|
14
|
+
@fmt2 = Epos::HtmlFormatter.new(unmarked: true)
|
15
|
+
|
16
|
+
@entry_parser = Epos::EntryParser.new
|
17
|
+
|
18
|
+
@fuzzy = {}
|
19
|
+
|
20
|
+
(@idf1.keys + @idf2.keys).each do |key|
|
21
|
+
simple = simplify(key)
|
22
|
+
if @fuzzy.has_key?(simple)
|
23
|
+
@fuzzy[simple] << key
|
24
|
+
else
|
25
|
+
@fuzzy[simple] = [key]
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
@fuzzy.each_value do |value|
|
30
|
+
value.sort!.uniq!
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
def look_up(word, level: 1, fragment: false)
|
35
|
+
html = ""
|
36
|
+
simple_word = simplify(word)
|
37
|
+
used = {}
|
38
|
+
|
39
|
+
if level >= 0
|
40
|
+
html << look_up_and_format(word)
|
41
|
+
used[word] = true
|
42
|
+
end
|
43
|
+
|
44
|
+
if level >= 1
|
45
|
+
(@fuzzy[simple_word] || []).each do |actual|
|
46
|
+
if !used[actual]
|
47
|
+
html << look_up_and_format(actual)
|
48
|
+
used[actual] = true
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
if level >= 2
|
54
|
+
@fuzzy.each do |simple, words|
|
55
|
+
if simple.include?(simple_word)
|
56
|
+
words.each do |actual|
|
57
|
+
if !used[actual]
|
58
|
+
html << look_up_and_format(actual)
|
59
|
+
used[actual] = true
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
if !fragment
|
67
|
+
if html.length == 0
|
68
|
+
html = "<i>No results were found for your query.</i>"
|
69
|
+
end
|
70
|
+
html = %Q{
|
71
|
+
<!DOCTYPE html>
|
72
|
+
<html>
|
73
|
+
<head>
|
74
|
+
<meta charset='utf-8'>
|
75
|
+
<title>Epos</title>
|
76
|
+
<style type='text/css'>#{self.style}</style>
|
77
|
+
</head>
|
78
|
+
<body>
|
79
|
+
#{html}
|
80
|
+
</body>
|
81
|
+
</html>
|
82
|
+
}
|
83
|
+
end
|
84
|
+
|
85
|
+
return html
|
86
|
+
end
|
87
|
+
|
88
|
+
def style
|
89
|
+
@fmt1.style
|
90
|
+
end
|
91
|
+
|
92
|
+
def look_up_and_format(word)
|
93
|
+
s = ""
|
94
|
+
s << @idf1.look_up(word).map{|text| @fmt1.format(@entry_parser.parse(text))}.join
|
95
|
+
s << @idf2.look_up(word).map{|text| @fmt2.format(@entry_parser.parse(text))}.join
|
96
|
+
return s
|
97
|
+
end
|
98
|
+
|
99
|
+
def simplify(s)
|
100
|
+
s.tr("áéíóúâêôãõàüçÁÉÍÓÚÂÊÔÃÕÀÜÇ", "aeiouaeoaoaucaeiouaeoaoauc").gsub(/[ -\\(\\)]/, "")
|
101
|
+
end
|
102
|
+
|
103
|
+
end
|
104
|
+
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
module Epos
|
2
|
+
class EncodedFile
|
3
|
+
|
4
|
+
def initialize(path)
|
5
|
+
@io = File.open(path, "rb")
|
6
|
+
end
|
7
|
+
|
8
|
+
def read(length = nil)
|
9
|
+
self.decipher(@io.read(length)).encode("utf-8", "iso-8859-1")
|
10
|
+
end
|
11
|
+
|
12
|
+
def seek(pos)
|
13
|
+
@io.seek(pos)
|
14
|
+
end
|
15
|
+
|
16
|
+
def close
|
17
|
+
@io.close
|
18
|
+
end
|
19
|
+
|
20
|
+
def eof?
|
21
|
+
@io.eof?
|
22
|
+
end
|
23
|
+
|
24
|
+
def self.read(path, length = nil)
|
25
|
+
file = EncodedFile.new(path)
|
26
|
+
data = file.read(length)
|
27
|
+
file.close
|
28
|
+
return data
|
29
|
+
end
|
30
|
+
|
31
|
+
protected
|
32
|
+
|
33
|
+
def decipher(data)
|
34
|
+
data.bytes.map{|b| b = (b + 0x0b) % 255; b == 11 ? 10 : b}.pack("C*")
|
35
|
+
end
|
36
|
+
|
37
|
+
end
|
38
|
+
end
|