poliqarpr-corpus 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.txt +71 -0
- data/Rakefile +23 -0
- data/corpus/frek.cdf +4 -0
- data/corpus/frek.cfg +100 -0
- data/corpus/frek.cfg~ +100 -0
- data/corpus/frek.meta.cfg +1 -0
- data/corpus/frek.meta.lisp +4 -0
- data/corpus/frek.poliqarp.base1.image +0 -0
- data/corpus/frek.poliqarp.base1.offset +0 -0
- data/corpus/frek.poliqarp.base2.image +0 -0
- data/corpus/frek.poliqarp.base2.offset +0 -0
- data/corpus/frek.poliqarp.chunk.image +0 -0
- data/corpus/frek.poliqarp.corpus.image +0 -0
- data/corpus/frek.poliqarp.meta-key.image +0 -0
- data/corpus/frek.poliqarp.meta-key.offset +0 -0
- data/corpus/frek.poliqarp.meta-value.image +0 -0
- data/corpus/frek.poliqarp.meta-value.offset +0 -0
- data/corpus/frek.poliqarp.meta.image +0 -0
- data/corpus/frek.poliqarp.orth.image +0 -0
- data/corpus/frek.poliqarp.orth.index.alpha +0 -0
- data/corpus/frek.poliqarp.orth.index.atergo +0 -0
- data/corpus/frek.poliqarp.orth.offset +0 -0
- data/corpus/frek.poliqarp.rindex.amb +0 -0
- data/corpus/frek.poliqarp.rindex.amb.offset +0 -0
- data/corpus/frek.poliqarp.rindex.disamb +0 -0
- data/corpus/frek.poliqarp.rindex.disamb.offset +0 -0
- data/corpus/frek.poliqarp.rindex.orth +0 -0
- data/corpus/frek.poliqarp.rindex.orth.offset +0 -0
- data/corpus/frek.poliqarp.subchunk.image +0 -0
- data/corpus/frek.poliqarp.subchunk.item.ch +0 -0
- data/corpus/frek.poliqarp.subchunk.offset +0 -0
- data/corpus/frek.poliqarp.subpos1.image +0 -0
- data/corpus/frek.poliqarp.subpos1.offset +0 -0
- data/corpus/frek.poliqarp.subpos2.image +0 -0
- data/corpus/frek.poliqarp.subpos2.offset +0 -0
- data/corpus/frek.poliqarp.tag.image +0 -0
- data/corpus/frek.poliqarp.tag.offset +0 -0
- data/lib/poliqarpr-corpus.rb +3 -0
- data/poliqarpr-corpus.gemspec +17 -0
- metadata +103 -0
data/README.txt
ADDED
@@ -0,0 +1,71 @@
|
|
1
|
+
= poliqarpr-corpus
|
2
|
+
|
3
|
+
* http://github.com/apohllo/poliqarpr-corpus
|
4
|
+
* http://korpus.pl/index.php?page=download
|
5
|
+
|
6
|
+
== DESCRIPTION:
|
7
|
+
|
8
|
+
Default corpus for poliqarpr (Ruby client for Poliqarp server).
|
9
|
+
|
10
|
+
|
11
|
+
== FEATURES/PROBLEMS:
|
12
|
+
|
13
|
+
* Default corpus allows to test the poliqarpr Ruby client without need to
|
14
|
+
manually download or create the corpus.
|
15
|
+
|
16
|
+
== SYNOPSIS:
|
17
|
+
|
18
|
+
Poliqarpr is Ruby client for Poliqarp corpus server. The default corpus
|
19
|
+
for Poliqarpr is used in examples and testing.
|
20
|
+
|
21
|
+
|
22
|
+
== REQUIREMENTS:
|
23
|
+
|
24
|
+
* poliqarpr (sudo gem install poliqarpr)
|
25
|
+
* Poliqarp server (only C implementation http://poliqarp.sourceforge.net/)
|
26
|
+
|
27
|
+
== INSTALL:
|
28
|
+
|
29
|
+
You need RubyGems v. 1.2
|
30
|
+
|
31
|
+
* gem -v
|
32
|
+
* 1.2.0 #=> ok
|
33
|
+
|
34
|
+
You need the gemcutter.org repository to be added to your sources list:
|
35
|
+
|
36
|
+
* gem sources -a http://gemcutter.org
|
37
|
+
|
38
|
+
Then you can type:
|
39
|
+
|
40
|
+
* sudo gem install poliqarpr-corpus
|
41
|
+
|
42
|
+
== BASIC USAGE:
|
43
|
+
|
44
|
+
Require the gem:
|
45
|
+
|
46
|
+
require 'poliaqarpr'
|
47
|
+
|
48
|
+
Create the server client and open the default corpus
|
49
|
+
|
50
|
+
client = Poliqarp::Client.new
|
51
|
+
client.open_corpus :default
|
52
|
+
|
53
|
+
Query the corpus for given segment
|
54
|
+
|
55
|
+
result = client.find("kot")
|
56
|
+
result[0].to_s
|
57
|
+
|
58
|
+
Remember to close the client on exit
|
59
|
+
|
60
|
+
client.close
|
61
|
+
|
62
|
+
== LICENSE:
|
63
|
+
|
64
|
+
The sample corpus ("frek" form
|
65
|
+
http://korpus.pl/index.php?page=download) is distributed under
|
66
|
+
the GNU GPL license v 2.0
|
67
|
+
http://www.gnu.org/licenses/old-licenses/gpl-2.0.html.
|
68
|
+
|
69
|
+
== FEEDBACK
|
70
|
+
|
71
|
+
* mailto:apohllo@o2.pl
|
data/Rakefile
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
task :default => [:install]
|
2
|
+
|
3
|
+
$gem_name = "poliqarpr-corpus"
|
4
|
+
|
5
|
+
desc "Build the gem"
|
6
|
+
task :build do
|
7
|
+
sh "gem build #$gem_name.gemspec"
|
8
|
+
end
|
9
|
+
|
10
|
+
desc "Install the library at local machnie"
|
11
|
+
task :install => :build do
|
12
|
+
sh "sudo gem install #$gem_name -l"
|
13
|
+
end
|
14
|
+
|
15
|
+
desc "Uninstall the library from local machnie"
|
16
|
+
task :uninstall do
|
17
|
+
sh "sudo gem uninstall #$gem_name"
|
18
|
+
end
|
19
|
+
|
20
|
+
desc "Clean"
|
21
|
+
task :clean do
|
22
|
+
sh "rm #$gem_name*.gem"
|
23
|
+
end
|
data/corpus/frek.cdf
ADDED
data/corpus/frek.cfg
ADDED
@@ -0,0 +1,100 @@
|
|
1
|
+
# Config file format for Oasis release
|
2
|
+
# Config version 1.0
|
3
|
+
|
4
|
+
# The new startup section may contain any command normally accepted by the shell
|
5
|
+
|
6
|
+
[ALIASES]
|
7
|
+
|
8
|
+
masc = m1|m2|m3
|
9
|
+
verb = pact|ppas|winien|praet|bedzie|fin|impt|aglt|ger|imps|inf|pant|pcon
|
10
|
+
noun = subst|depr|xxs|ger|ppron12|ppron3
|
11
|
+
pron = ppron12|ppron3|siebie
|
12
|
+
|
13
|
+
|
14
|
+
[ATTR]
|
15
|
+
|
16
|
+
number = sg pl
|
17
|
+
case = nom gen dat acc inst loc voc
|
18
|
+
gender = m1 m2 m3 f n
|
19
|
+
person = pri sec ter
|
20
|
+
degree = pos comp sup
|
21
|
+
aspect = imperf perf
|
22
|
+
negation = aff neg
|
23
|
+
accommodability = congr rec
|
24
|
+
accentability = akc nakc
|
25
|
+
post-prepositionality = npraep praep
|
26
|
+
agglutination = agl nagl
|
27
|
+
vocalicity = nwok wok
|
28
|
+
|
29
|
+
# Parts of speech no longer need forward declarations, this was inconvenient and ugly.
|
30
|
+
# Also, any attribute may be optional so a declaration such as:
|
31
|
+
# foo = [bar] [froz] fred [wilma]
|
32
|
+
# should no longer cause problems and ctags with such attributes now parse correctly regardless
|
33
|
+
# of presence or absence of any optional attribute
|
34
|
+
|
35
|
+
[POS]
|
36
|
+
|
37
|
+
adja =
|
38
|
+
adjp =
|
39
|
+
conj =
|
40
|
+
interp =
|
41
|
+
pred =
|
42
|
+
xxx =
|
43
|
+
adv = degree
|
44
|
+
imps = aspect
|
45
|
+
inf = aspect
|
46
|
+
pant = aspect
|
47
|
+
pcon = aspect
|
48
|
+
qub = [vocalicity]
|
49
|
+
prep = case [vocalicity]
|
50
|
+
siebie = case
|
51
|
+
subst = number case gender
|
52
|
+
depr = number case gender
|
53
|
+
xxs = number case gender
|
54
|
+
ger = number case gender aspect negation
|
55
|
+
ppron12 = number case gender person [accentability]
|
56
|
+
ppron3 = number case gender person [accentability] [post-prepositionality]
|
57
|
+
num = number case gender [accommodability]
|
58
|
+
adj = number case gender degree
|
59
|
+
pact = number case gender aspect negation
|
60
|
+
ppas = number case gender aspect negation
|
61
|
+
winien = number gender aspect
|
62
|
+
praet = number gender aspect [agglutination]
|
63
|
+
bedzie = number person aspect
|
64
|
+
fin = number person aspect
|
65
|
+
impt = number person aspect
|
66
|
+
aglt = number person aspect vocalicity
|
67
|
+
ign =
|
68
|
+
|
69
|
+
# Named entities replaced old 'special' attributes, name changed mostly because of
|
70
|
+
# unification of 'named-thing' handling code into one named-entity thing
|
71
|
+
# Entity aliasing allows for any existing entity to be seen under different name
|
72
|
+
#
|
73
|
+
# FCQP provides four builtin entities:
|
74
|
+
# entity-current
|
75
|
+
# entity-base
|
76
|
+
# entity-tag
|
77
|
+
# entity-pos
|
78
|
+
|
79
|
+
[NAMED-ENTITY]
|
80
|
+
|
81
|
+
entity-orth = orth
|
82
|
+
entity-base = base
|
83
|
+
entity-tag = tag
|
84
|
+
entity-pos = pos
|
85
|
+
|
86
|
+
# Old 'aliases' for attribute names
|
87
|
+
|
88
|
+
pos = flex
|
89
|
+
number = numb nmb
|
90
|
+
case = cas
|
91
|
+
gender = gnd gend
|
92
|
+
person = per pers
|
93
|
+
degree = deg degr
|
94
|
+
aspect = asp
|
95
|
+
negation = neg
|
96
|
+
accommodability = acco acom acm
|
97
|
+
accentability = acce acen acn
|
98
|
+
post-prepositionality = ppr ppre
|
99
|
+
agglutination = agg aggl
|
100
|
+
vocalicity = vcl
|
data/corpus/frek.cfg~
ADDED
@@ -0,0 +1,100 @@
|
|
1
|
+
# Config file format for Oasis release
|
2
|
+
# Config version 1.0
|
3
|
+
|
4
|
+
# The new startup section may contain any command normally accepted by the shell
|
5
|
+
|
6
|
+
[STARTUP]
|
7
|
+
|
8
|
+
/alias masc = m1 m2 m3
|
9
|
+
/alias verb = pact ppas winien praet bedzie fin impt aglt ger imps inf pant pcon
|
10
|
+
/alias noun = subst depr xxs ger ppron12 ppron3
|
11
|
+
/alias pron = ppron12 ppron3 siebie
|
12
|
+
|
13
|
+
|
14
|
+
[ATTR]
|
15
|
+
|
16
|
+
number = sg pl
|
17
|
+
case = nom gen dat acc inst loc voc
|
18
|
+
gender = m1 m2 m3 f n
|
19
|
+
person = pri sec ter
|
20
|
+
degree = pos comp sup
|
21
|
+
aspect = imperf perf
|
22
|
+
negation = aff neg
|
23
|
+
accommodability = congr rec
|
24
|
+
accentability = akc nakc
|
25
|
+
post-prepositionality = npraep praep
|
26
|
+
agglutination = agl nagl
|
27
|
+
vocalicity = nwok wok
|
28
|
+
|
29
|
+
# Parts of speech no longer need forward declarations, this was inconvenient and ugly.
|
30
|
+
# Also, any attribute may be optional so a declaration such as:
|
31
|
+
# foo = [bar] [froz] fred [wilma]
|
32
|
+
# should no longer cause problems and ctags with such attributes now parse correctly regardless
|
33
|
+
# of presence or absence of any optional attribute
|
34
|
+
|
35
|
+
[POS]
|
36
|
+
|
37
|
+
adja =
|
38
|
+
adjp =
|
39
|
+
conj =
|
40
|
+
interp =
|
41
|
+
pred =
|
42
|
+
xxx =
|
43
|
+
adv = degree
|
44
|
+
imps = aspect
|
45
|
+
inf = aspect
|
46
|
+
pant = aspect
|
47
|
+
pcon = aspect
|
48
|
+
qub = [vocalicity]
|
49
|
+
prep = case [vocalicity]
|
50
|
+
siebie = case
|
51
|
+
subst = number case gender
|
52
|
+
depr = number case gender
|
53
|
+
xxs = number case gender
|
54
|
+
ger = number case gender aspect negation
|
55
|
+
ppron12 = number case gender person [accentability]
|
56
|
+
ppron3 = number case gender person [accentability] [post-prepositionality]
|
57
|
+
num = number case gender [accommodability]
|
58
|
+
adj = number case gender degree
|
59
|
+
pact = number case gender aspect negation
|
60
|
+
ppas = number case gender aspect negation
|
61
|
+
winien = number gender aspect
|
62
|
+
praet = number gender aspect [agglutination]
|
63
|
+
bedzie = number person aspect
|
64
|
+
fin = number person aspect
|
65
|
+
impt = number person aspect
|
66
|
+
aglt = number person aspect vocalicity
|
67
|
+
ign =
|
68
|
+
|
69
|
+
# Named entities replaced old 'special' attributes, name changed mostly because of
|
70
|
+
# unification of 'named-thing' handling code into one named-entity thing
|
71
|
+
# Entity aliasing allows for any existing entity to be seen under different name
|
72
|
+
#
|
73
|
+
# FCQP provides four builtin entities:
|
74
|
+
# entity-current
|
75
|
+
# entity-base
|
76
|
+
# entity-tag
|
77
|
+
# entity-pos
|
78
|
+
|
79
|
+
[NAMED-ENTITY]
|
80
|
+
|
81
|
+
entity-orth = orth
|
82
|
+
entity-base = base
|
83
|
+
entity-tag = tag
|
84
|
+
entity-pos = pos
|
85
|
+
|
86
|
+
# Old 'aliases' for attribute names
|
87
|
+
|
88
|
+
pos = flex
|
89
|
+
number = numb nmb
|
90
|
+
case = cas
|
91
|
+
gender = gnd gend
|
92
|
+
person = per pers
|
93
|
+
degree = deg degr
|
94
|
+
aspect = asp
|
95
|
+
negation = neg
|
96
|
+
accommodability = acco acom acm
|
97
|
+
accentability = acce acen acn
|
98
|
+
post-prepositionality = ppr ppre
|
99
|
+
agglutination = agg aggl
|
100
|
+
vocalicity = vcl
|
@@ -0,0 +1 @@
|
|
1
|
+
S sample
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
@@ -0,0 +1,17 @@
|
|
1
|
+
Gem::Specification.new do |s|
|
2
|
+
s.name = "poliqarpr-corpus"
|
3
|
+
s.version = "1.0.1"
|
4
|
+
s.date = "2009-12-10"
|
5
|
+
s.summary = "Default corpus for poliqarpr"
|
6
|
+
s.email = "apohllo@o2.pl"
|
7
|
+
s.homepage = "http://www.github.com/apohllo/poliqarpr-corpus"
|
8
|
+
s.description = "Default corpus for Ruby client for Poliqarp (NLP corpus server)"
|
9
|
+
s.authors = ['Aleksander Pohl']
|
10
|
+
s.files = ["Rakefile", "poliqarpr-corpus.gemspec", 'lib/poliqarpr-corpus.rb',
|
11
|
+
"README.txt", ] + Dir.glob("corpus/**/*")
|
12
|
+
s.rdoc_options = ["--main", "README.txt"]
|
13
|
+
s.has_rdoc = true
|
14
|
+
s.extra_rdoc_files = ["README.txt"]
|
15
|
+
s.add_dependency("poliqarpr", [">= 0.0.3"])
|
16
|
+
end
|
17
|
+
|
metadata
ADDED
@@ -0,0 +1,103 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: poliqarpr-corpus
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Aleksander Pohl
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2009-12-10 00:00:00 +01:00
|
13
|
+
default_executable:
|
14
|
+
dependencies:
|
15
|
+
- !ruby/object:Gem::Dependency
|
16
|
+
name: poliqarpr
|
17
|
+
type: :runtime
|
18
|
+
version_requirement:
|
19
|
+
version_requirements: !ruby/object:Gem::Requirement
|
20
|
+
requirements:
|
21
|
+
- - ">="
|
22
|
+
- !ruby/object:Gem::Version
|
23
|
+
version: 0.0.3
|
24
|
+
version:
|
25
|
+
description: Default corpus for Ruby client for Poliqarp (NLP corpus server)
|
26
|
+
email: apohllo@o2.pl
|
27
|
+
executables: []
|
28
|
+
|
29
|
+
extensions: []
|
30
|
+
|
31
|
+
extra_rdoc_files:
|
32
|
+
- README.txt
|
33
|
+
files:
|
34
|
+
- Rakefile
|
35
|
+
- poliqarpr-corpus.gemspec
|
36
|
+
- lib/poliqarpr-corpus.rb
|
37
|
+
- README.txt
|
38
|
+
- corpus/frek.poliqarp.base2.offset
|
39
|
+
- corpus/frek.poliqarp.subchunk.image
|
40
|
+
- corpus/frek.poliqarp.meta-value.image
|
41
|
+
- corpus/frek.poliqarp.corpus.image
|
42
|
+
- corpus/frek.poliqarp.chunk.image
|
43
|
+
- corpus/frek.poliqarp.subpos1.image
|
44
|
+
- corpus/frek.poliqarp.subchunk.offset
|
45
|
+
- corpus/frek.poliqarp.subpos1.offset
|
46
|
+
- corpus/frek.poliqarp.rindex.amb.offset
|
47
|
+
- corpus/frek.poliqarp.base1.offset
|
48
|
+
- corpus/frek.poliqarp.subpos2.image
|
49
|
+
- corpus/frek.poliqarp.tag.image
|
50
|
+
- corpus/frek.poliqarp.rindex.orth
|
51
|
+
- corpus/frek.cfg~
|
52
|
+
- corpus/frek.poliqarp.orth.offset
|
53
|
+
- corpus/frek.meta.lisp
|
54
|
+
- corpus/frek.cdf
|
55
|
+
- corpus/frek.cfg
|
56
|
+
- corpus/frek.poliqarp.rindex.disamb
|
57
|
+
- corpus/frek.poliqarp.orth.index.alpha
|
58
|
+
- corpus/frek.poliqarp.subchunk.item.ch
|
59
|
+
- corpus/frek.poliqarp.meta-key.image
|
60
|
+
- corpus/frek.poliqarp.meta-value.offset
|
61
|
+
- corpus/frek.poliqarp.orth.index.atergo
|
62
|
+
- corpus/frek.poliqarp.base1.image
|
63
|
+
- corpus/frek.poliqarp.orth.image
|
64
|
+
- corpus/frek.poliqarp.meta.image
|
65
|
+
- corpus/frek.poliqarp.meta-key.offset
|
66
|
+
- corpus/frek.poliqarp.rindex.disamb.offset
|
67
|
+
- corpus/frek.poliqarp.rindex.orth.offset
|
68
|
+
- corpus/frek.poliqarp.subpos2.offset
|
69
|
+
- corpus/frek.poliqarp.rindex.amb
|
70
|
+
- corpus/frek.meta.cfg
|
71
|
+
- corpus/frek.poliqarp.tag.offset
|
72
|
+
- corpus/frek.poliqarp.base2.image
|
73
|
+
has_rdoc: true
|
74
|
+
homepage: http://www.github.com/apohllo/poliqarpr-corpus
|
75
|
+
licenses: []
|
76
|
+
|
77
|
+
post_install_message:
|
78
|
+
rdoc_options:
|
79
|
+
- --main
|
80
|
+
- README.txt
|
81
|
+
require_paths:
|
82
|
+
- lib
|
83
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
84
|
+
requirements:
|
85
|
+
- - ">="
|
86
|
+
- !ruby/object:Gem::Version
|
87
|
+
version: "0"
|
88
|
+
version:
|
89
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
90
|
+
requirements:
|
91
|
+
- - ">="
|
92
|
+
- !ruby/object:Gem::Version
|
93
|
+
version: "0"
|
94
|
+
version:
|
95
|
+
requirements: []
|
96
|
+
|
97
|
+
rubyforge_project:
|
98
|
+
rubygems_version: 1.3.5
|
99
|
+
signing_key:
|
100
|
+
specification_version: 3
|
101
|
+
summary: Default corpus for poliqarpr
|
102
|
+
test_files: []
|
103
|
+
|