poliqarpr-corpus 1.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/README.txt +71 -0
- data/Rakefile +23 -0
- data/corpus/frek.cdf +4 -0
- data/corpus/frek.cfg +100 -0
- data/corpus/frek.cfg~ +100 -0
- data/corpus/frek.meta.cfg +1 -0
- data/corpus/frek.meta.lisp +4 -0
- data/corpus/frek.poliqarp.base1.image +0 -0
- data/corpus/frek.poliqarp.base1.offset +0 -0
- data/corpus/frek.poliqarp.base2.image +0 -0
- data/corpus/frek.poliqarp.base2.offset +0 -0
- data/corpus/frek.poliqarp.chunk.image +0 -0
- data/corpus/frek.poliqarp.corpus.image +0 -0
- data/corpus/frek.poliqarp.meta-key.image +0 -0
- data/corpus/frek.poliqarp.meta-key.offset +0 -0
- data/corpus/frek.poliqarp.meta-value.image +0 -0
- data/corpus/frek.poliqarp.meta-value.offset +0 -0
- data/corpus/frek.poliqarp.meta.image +0 -0
- data/corpus/frek.poliqarp.orth.image +0 -0
- data/corpus/frek.poliqarp.orth.index.alpha +0 -0
- data/corpus/frek.poliqarp.orth.index.atergo +0 -0
- data/corpus/frek.poliqarp.orth.offset +0 -0
- data/corpus/frek.poliqarp.rindex.amb +0 -0
- data/corpus/frek.poliqarp.rindex.amb.offset +0 -0
- data/corpus/frek.poliqarp.rindex.disamb +0 -0
- data/corpus/frek.poliqarp.rindex.disamb.offset +0 -0
- data/corpus/frek.poliqarp.rindex.orth +0 -0
- data/corpus/frek.poliqarp.rindex.orth.offset +0 -0
- data/corpus/frek.poliqarp.subchunk.image +0 -0
- data/corpus/frek.poliqarp.subchunk.item.ch +0 -0
- data/corpus/frek.poliqarp.subchunk.offset +0 -0
- data/corpus/frek.poliqarp.subpos1.image +0 -0
- data/corpus/frek.poliqarp.subpos1.offset +0 -0
- data/corpus/frek.poliqarp.subpos2.image +0 -0
- data/corpus/frek.poliqarp.subpos2.offset +0 -0
- data/corpus/frek.poliqarp.tag.image +0 -0
- data/corpus/frek.poliqarp.tag.offset +0 -0
- data/lib/poliqarpr-corpus.rb +3 -0
- data/poliqarpr-corpus.gemspec +17 -0
- metadata +103 -0
data/README.txt
ADDED
@@ -0,0 +1,71 @@
|
|
1
|
+
= poliqarpr-corpus
|
2
|
+
|
3
|
+
* http://github.com/apohllo/poliqarpr-corpus
|
4
|
+
* http://korpus.pl/index.php?page=download
|
5
|
+
|
6
|
+
== DESCRIPTION:
|
7
|
+
|
8
|
+
Default corpus for poliqarpr (Ruby client for Poliqarp server).
|
9
|
+
|
10
|
+
|
11
|
+
== FEATURES/PROBLEMS:
|
12
|
+
|
13
|
+
* Default corpus allows to test the poliqarpr Ruby client without need to
|
14
|
+
manually download or create the corpus.
|
15
|
+
|
16
|
+
== SYNOPSIS:
|
17
|
+
|
18
|
+
Poliqarpr is Ruby client for Poliqarp corpus server. The default corpus
|
19
|
+
for Poliqarpr is used in examples and testing.
|
20
|
+
|
21
|
+
|
22
|
+
== REQUIREMENTS:
|
23
|
+
|
24
|
+
* poliqarpr (sudo gem install poliqarpr)
|
25
|
+
* Poliqarp server (only C implementation http://poliqarp.sourceforge.net/)
|
26
|
+
|
27
|
+
== INSTALL:
|
28
|
+
|
29
|
+
You need RubyGems v. 1.2
|
30
|
+
|
31
|
+
* gem -v
|
32
|
+
* 1.2.0 #=> ok
|
33
|
+
|
34
|
+
You need the gemcutter.org repository to be added to your sources list:
|
35
|
+
|
36
|
+
* gem sources -a http://gemcutter.org
|
37
|
+
|
38
|
+
Then you can type:
|
39
|
+
|
40
|
+
* sudo gem install poliqarpr-corpus
|
41
|
+
|
42
|
+
== BASIC USAGE:
|
43
|
+
|
44
|
+
Require the gem:
|
45
|
+
|
46
|
+
require 'poliaqarpr'
|
47
|
+
|
48
|
+
Create the server client and open the default corpus
|
49
|
+
|
50
|
+
client = Poliqarp::Client.new
|
51
|
+
client.open_corpus :default
|
52
|
+
|
53
|
+
Query the corpus for given segment
|
54
|
+
|
55
|
+
result = client.find("kot")
|
56
|
+
result[0].to_s
|
57
|
+
|
58
|
+
Remember to close the client on exit
|
59
|
+
|
60
|
+
client.close
|
61
|
+
|
62
|
+
== LICENSE:
|
63
|
+
|
64
|
+
The sample corpus ("frek" form
|
65
|
+
http://korpus.pl/index.php?page=download) is distributed under
|
66
|
+
the GNU GPL license v 2.0
|
67
|
+
http://www.gnu.org/licenses/old-licenses/gpl-2.0.html.
|
68
|
+
|
69
|
+
== FEEDBACK
|
70
|
+
|
71
|
+
* mailto:apohllo@o2.pl
|
data/Rakefile
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
task :default => [:install]
|
2
|
+
|
3
|
+
$gem_name = "poliqarpr-corpus"
|
4
|
+
|
5
|
+
desc "Build the gem"
|
6
|
+
task :build do
|
7
|
+
sh "gem build #$gem_name.gemspec"
|
8
|
+
end
|
9
|
+
|
10
|
+
desc "Install the library at local machnie"
|
11
|
+
task :install => :build do
|
12
|
+
sh "sudo gem install #$gem_name -l"
|
13
|
+
end
|
14
|
+
|
15
|
+
desc "Uninstall the library from local machnie"
|
16
|
+
task :uninstall do
|
17
|
+
sh "sudo gem uninstall #$gem_name"
|
18
|
+
end
|
19
|
+
|
20
|
+
desc "Clean"
|
21
|
+
task :clean do
|
22
|
+
sh "rm #$gem_name*.gem"
|
23
|
+
end
|
data/corpus/frek.cdf
ADDED
data/corpus/frek.cfg
ADDED
@@ -0,0 +1,100 @@
|
|
1
|
+
# Config file format for Oasis release
|
2
|
+
# Config version 1.0
|
3
|
+
|
4
|
+
# The new startup section may contain any command normally accepted by the shell
|
5
|
+
|
6
|
+
[ALIASES]
|
7
|
+
|
8
|
+
masc = m1|m2|m3
|
9
|
+
verb = pact|ppas|winien|praet|bedzie|fin|impt|aglt|ger|imps|inf|pant|pcon
|
10
|
+
noun = subst|depr|xxs|ger|ppron12|ppron3
|
11
|
+
pron = ppron12|ppron3|siebie
|
12
|
+
|
13
|
+
|
14
|
+
[ATTR]
|
15
|
+
|
16
|
+
number = sg pl
|
17
|
+
case = nom gen dat acc inst loc voc
|
18
|
+
gender = m1 m2 m3 f n
|
19
|
+
person = pri sec ter
|
20
|
+
degree = pos comp sup
|
21
|
+
aspect = imperf perf
|
22
|
+
negation = aff neg
|
23
|
+
accommodability = congr rec
|
24
|
+
accentability = akc nakc
|
25
|
+
post-prepositionality = npraep praep
|
26
|
+
agglutination = agl nagl
|
27
|
+
vocalicity = nwok wok
|
28
|
+
|
29
|
+
# Parts of speech no longer need forward declarations, this was inconvenient and ugly.
|
30
|
+
# Also, any attribute may be optional so a declaration such as:
|
31
|
+
# foo = [bar] [froz] fred [wilma]
|
32
|
+
# should no longer cause problems and ctags with such attributes now parse correctly regardless
|
33
|
+
# of presence or absence of any optional attribute
|
34
|
+
|
35
|
+
[POS]
|
36
|
+
|
37
|
+
adja =
|
38
|
+
adjp =
|
39
|
+
conj =
|
40
|
+
interp =
|
41
|
+
pred =
|
42
|
+
xxx =
|
43
|
+
adv = degree
|
44
|
+
imps = aspect
|
45
|
+
inf = aspect
|
46
|
+
pant = aspect
|
47
|
+
pcon = aspect
|
48
|
+
qub = [vocalicity]
|
49
|
+
prep = case [vocalicity]
|
50
|
+
siebie = case
|
51
|
+
subst = number case gender
|
52
|
+
depr = number case gender
|
53
|
+
xxs = number case gender
|
54
|
+
ger = number case gender aspect negation
|
55
|
+
ppron12 = number case gender person [accentability]
|
56
|
+
ppron3 = number case gender person [accentability] [post-prepositionality]
|
57
|
+
num = number case gender [accommodability]
|
58
|
+
adj = number case gender degree
|
59
|
+
pact = number case gender aspect negation
|
60
|
+
ppas = number case gender aspect negation
|
61
|
+
winien = number gender aspect
|
62
|
+
praet = number gender aspect [agglutination]
|
63
|
+
bedzie = number person aspect
|
64
|
+
fin = number person aspect
|
65
|
+
impt = number person aspect
|
66
|
+
aglt = number person aspect vocalicity
|
67
|
+
ign =
|
68
|
+
|
69
|
+
# Named entities replaced old 'special' attributes, name changed mostly because of
|
70
|
+
# unification of 'named-thing' handling code into one named-entity thing
|
71
|
+
# Entity aliasing allows for any existing entity to be seen under different name
|
72
|
+
#
|
73
|
+
# FCQP provides four builtin entities:
|
74
|
+
# entity-current
|
75
|
+
# entity-base
|
76
|
+
# entity-tag
|
77
|
+
# entity-pos
|
78
|
+
|
79
|
+
[NAMED-ENTITY]
|
80
|
+
|
81
|
+
entity-orth = orth
|
82
|
+
entity-base = base
|
83
|
+
entity-tag = tag
|
84
|
+
entity-pos = pos
|
85
|
+
|
86
|
+
# Old 'aliases' for attribute names
|
87
|
+
|
88
|
+
pos = flex
|
89
|
+
number = numb nmb
|
90
|
+
case = cas
|
91
|
+
gender = gnd gend
|
92
|
+
person = per pers
|
93
|
+
degree = deg degr
|
94
|
+
aspect = asp
|
95
|
+
negation = neg
|
96
|
+
accommodability = acco acom acm
|
97
|
+
accentability = acce acen acn
|
98
|
+
post-prepositionality = ppr ppre
|
99
|
+
agglutination = agg aggl
|
100
|
+
vocalicity = vcl
|
data/corpus/frek.cfg~
ADDED
@@ -0,0 +1,100 @@
|
|
1
|
+
# Config file format for Oasis release
|
2
|
+
# Config version 1.0
|
3
|
+
|
4
|
+
# The new startup section may contain any command normally accepted by the shell
|
5
|
+
|
6
|
+
[STARTUP]
|
7
|
+
|
8
|
+
/alias masc = m1 m2 m3
|
9
|
+
/alias verb = pact ppas winien praet bedzie fin impt aglt ger imps inf pant pcon
|
10
|
+
/alias noun = subst depr xxs ger ppron12 ppron3
|
11
|
+
/alias pron = ppron12 ppron3 siebie
|
12
|
+
|
13
|
+
|
14
|
+
[ATTR]
|
15
|
+
|
16
|
+
number = sg pl
|
17
|
+
case = nom gen dat acc inst loc voc
|
18
|
+
gender = m1 m2 m3 f n
|
19
|
+
person = pri sec ter
|
20
|
+
degree = pos comp sup
|
21
|
+
aspect = imperf perf
|
22
|
+
negation = aff neg
|
23
|
+
accommodability = congr rec
|
24
|
+
accentability = akc nakc
|
25
|
+
post-prepositionality = npraep praep
|
26
|
+
agglutination = agl nagl
|
27
|
+
vocalicity = nwok wok
|
28
|
+
|
29
|
+
# Parts of speech no longer need forward declarations, this was inconvenient and ugly.
|
30
|
+
# Also, any attribute may be optional so a declaration such as:
|
31
|
+
# foo = [bar] [froz] fred [wilma]
|
32
|
+
# should no longer cause problems and ctags with such attributes now parse correctly regardless
|
33
|
+
# of presence or absence of any optional attribute
|
34
|
+
|
35
|
+
[POS]
|
36
|
+
|
37
|
+
adja =
|
38
|
+
adjp =
|
39
|
+
conj =
|
40
|
+
interp =
|
41
|
+
pred =
|
42
|
+
xxx =
|
43
|
+
adv = degree
|
44
|
+
imps = aspect
|
45
|
+
inf = aspect
|
46
|
+
pant = aspect
|
47
|
+
pcon = aspect
|
48
|
+
qub = [vocalicity]
|
49
|
+
prep = case [vocalicity]
|
50
|
+
siebie = case
|
51
|
+
subst = number case gender
|
52
|
+
depr = number case gender
|
53
|
+
xxs = number case gender
|
54
|
+
ger = number case gender aspect negation
|
55
|
+
ppron12 = number case gender person [accentability]
|
56
|
+
ppron3 = number case gender person [accentability] [post-prepositionality]
|
57
|
+
num = number case gender [accommodability]
|
58
|
+
adj = number case gender degree
|
59
|
+
pact = number case gender aspect negation
|
60
|
+
ppas = number case gender aspect negation
|
61
|
+
winien = number gender aspect
|
62
|
+
praet = number gender aspect [agglutination]
|
63
|
+
bedzie = number person aspect
|
64
|
+
fin = number person aspect
|
65
|
+
impt = number person aspect
|
66
|
+
aglt = number person aspect vocalicity
|
67
|
+
ign =
|
68
|
+
|
69
|
+
# Named entities replaced old 'special' attributes, name changed mostly because of
|
70
|
+
# unification of 'named-thing' handling code into one named-entity thing
|
71
|
+
# Entity aliasing allows for any existing entity to be seen under different name
|
72
|
+
#
|
73
|
+
# FCQP provides four builtin entities:
|
74
|
+
# entity-current
|
75
|
+
# entity-base
|
76
|
+
# entity-tag
|
77
|
+
# entity-pos
|
78
|
+
|
79
|
+
[NAMED-ENTITY]
|
80
|
+
|
81
|
+
entity-orth = orth
|
82
|
+
entity-base = base
|
83
|
+
entity-tag = tag
|
84
|
+
entity-pos = pos
|
85
|
+
|
86
|
+
# Old 'aliases' for attribute names
|
87
|
+
|
88
|
+
pos = flex
|
89
|
+
number = numb nmb
|
90
|
+
case = cas
|
91
|
+
gender = gnd gend
|
92
|
+
person = per pers
|
93
|
+
degree = deg degr
|
94
|
+
aspect = asp
|
95
|
+
negation = neg
|
96
|
+
accommodability = acco acom acm
|
97
|
+
accentability = acce acen acn
|
98
|
+
post-prepositionality = ppr ppre
|
99
|
+
agglutination = agg aggl
|
100
|
+
vocalicity = vcl
|
@@ -0,0 +1 @@
|
|
1
|
+
S sample
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
@@ -0,0 +1,17 @@
|
|
1
|
+
Gem::Specification.new do |s|
|
2
|
+
s.name = "poliqarpr-corpus"
|
3
|
+
s.version = "1.0.1"
|
4
|
+
s.date = "2009-12-10"
|
5
|
+
s.summary = "Default corpus for poliqarpr"
|
6
|
+
s.email = "apohllo@o2.pl"
|
7
|
+
s.homepage = "http://www.github.com/apohllo/poliqarpr-corpus"
|
8
|
+
s.description = "Default corpus for Ruby client for Poliqarp (NLP corpus server)"
|
9
|
+
s.authors = ['Aleksander Pohl']
|
10
|
+
s.files = ["Rakefile", "poliqarpr-corpus.gemspec", 'lib/poliqarpr-corpus.rb',
|
11
|
+
"README.txt", ] + Dir.glob("corpus/**/*")
|
12
|
+
s.rdoc_options = ["--main", "README.txt"]
|
13
|
+
s.has_rdoc = true
|
14
|
+
s.extra_rdoc_files = ["README.txt"]
|
15
|
+
s.add_dependency("poliqarpr", [">= 0.0.3"])
|
16
|
+
end
|
17
|
+
|
metadata
ADDED
@@ -0,0 +1,103 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: poliqarpr-corpus
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Aleksander Pohl
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2009-12-10 00:00:00 +01:00
|
13
|
+
default_executable:
|
14
|
+
dependencies:
|
15
|
+
- !ruby/object:Gem::Dependency
|
16
|
+
name: poliqarpr
|
17
|
+
type: :runtime
|
18
|
+
version_requirement:
|
19
|
+
version_requirements: !ruby/object:Gem::Requirement
|
20
|
+
requirements:
|
21
|
+
- - ">="
|
22
|
+
- !ruby/object:Gem::Version
|
23
|
+
version: 0.0.3
|
24
|
+
version:
|
25
|
+
description: Default corpus for Ruby client for Poliqarp (NLP corpus server)
|
26
|
+
email: apohllo@o2.pl
|
27
|
+
executables: []
|
28
|
+
|
29
|
+
extensions: []
|
30
|
+
|
31
|
+
extra_rdoc_files:
|
32
|
+
- README.txt
|
33
|
+
files:
|
34
|
+
- Rakefile
|
35
|
+
- poliqarpr-corpus.gemspec
|
36
|
+
- lib/poliqarpr-corpus.rb
|
37
|
+
- README.txt
|
38
|
+
- corpus/frek.poliqarp.base2.offset
|
39
|
+
- corpus/frek.poliqarp.subchunk.image
|
40
|
+
- corpus/frek.poliqarp.meta-value.image
|
41
|
+
- corpus/frek.poliqarp.corpus.image
|
42
|
+
- corpus/frek.poliqarp.chunk.image
|
43
|
+
- corpus/frek.poliqarp.subpos1.image
|
44
|
+
- corpus/frek.poliqarp.subchunk.offset
|
45
|
+
- corpus/frek.poliqarp.subpos1.offset
|
46
|
+
- corpus/frek.poliqarp.rindex.amb.offset
|
47
|
+
- corpus/frek.poliqarp.base1.offset
|
48
|
+
- corpus/frek.poliqarp.subpos2.image
|
49
|
+
- corpus/frek.poliqarp.tag.image
|
50
|
+
- corpus/frek.poliqarp.rindex.orth
|
51
|
+
- corpus/frek.cfg~
|
52
|
+
- corpus/frek.poliqarp.orth.offset
|
53
|
+
- corpus/frek.meta.lisp
|
54
|
+
- corpus/frek.cdf
|
55
|
+
- corpus/frek.cfg
|
56
|
+
- corpus/frek.poliqarp.rindex.disamb
|
57
|
+
- corpus/frek.poliqarp.orth.index.alpha
|
58
|
+
- corpus/frek.poliqarp.subchunk.item.ch
|
59
|
+
- corpus/frek.poliqarp.meta-key.image
|
60
|
+
- corpus/frek.poliqarp.meta-value.offset
|
61
|
+
- corpus/frek.poliqarp.orth.index.atergo
|
62
|
+
- corpus/frek.poliqarp.base1.image
|
63
|
+
- corpus/frek.poliqarp.orth.image
|
64
|
+
- corpus/frek.poliqarp.meta.image
|
65
|
+
- corpus/frek.poliqarp.meta-key.offset
|
66
|
+
- corpus/frek.poliqarp.rindex.disamb.offset
|
67
|
+
- corpus/frek.poliqarp.rindex.orth.offset
|
68
|
+
- corpus/frek.poliqarp.subpos2.offset
|
69
|
+
- corpus/frek.poliqarp.rindex.amb
|
70
|
+
- corpus/frek.meta.cfg
|
71
|
+
- corpus/frek.poliqarp.tag.offset
|
72
|
+
- corpus/frek.poliqarp.base2.image
|
73
|
+
has_rdoc: true
|
74
|
+
homepage: http://www.github.com/apohllo/poliqarpr-corpus
|
75
|
+
licenses: []
|
76
|
+
|
77
|
+
post_install_message:
|
78
|
+
rdoc_options:
|
79
|
+
- --main
|
80
|
+
- README.txt
|
81
|
+
require_paths:
|
82
|
+
- lib
|
83
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
84
|
+
requirements:
|
85
|
+
- - ">="
|
86
|
+
- !ruby/object:Gem::Version
|
87
|
+
version: "0"
|
88
|
+
version:
|
89
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
90
|
+
requirements:
|
91
|
+
- - ">="
|
92
|
+
- !ruby/object:Gem::Version
|
93
|
+
version: "0"
|
94
|
+
version:
|
95
|
+
requirements: []
|
96
|
+
|
97
|
+
rubyforge_project:
|
98
|
+
rubygems_version: 1.3.5
|
99
|
+
signing_key:
|
100
|
+
specification_version: 3
|
101
|
+
summary: Default corpus for poliqarpr
|
102
|
+
test_files: []
|
103
|
+
|