poliqarpr-corpus 1.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (40) hide show
  1. data/README.txt +71 -0
  2. data/Rakefile +23 -0
  3. data/corpus/frek.cdf +4 -0
  4. data/corpus/frek.cfg +100 -0
  5. data/corpus/frek.cfg~ +100 -0
  6. data/corpus/frek.meta.cfg +1 -0
  7. data/corpus/frek.meta.lisp +4 -0
  8. data/corpus/frek.poliqarp.base1.image +0 -0
  9. data/corpus/frek.poliqarp.base1.offset +0 -0
  10. data/corpus/frek.poliqarp.base2.image +0 -0
  11. data/corpus/frek.poliqarp.base2.offset +0 -0
  12. data/corpus/frek.poliqarp.chunk.image +0 -0
  13. data/corpus/frek.poliqarp.corpus.image +0 -0
  14. data/corpus/frek.poliqarp.meta-key.image +0 -0
  15. data/corpus/frek.poliqarp.meta-key.offset +0 -0
  16. data/corpus/frek.poliqarp.meta-value.image +0 -0
  17. data/corpus/frek.poliqarp.meta-value.offset +0 -0
  18. data/corpus/frek.poliqarp.meta.image +0 -0
  19. data/corpus/frek.poliqarp.orth.image +0 -0
  20. data/corpus/frek.poliqarp.orth.index.alpha +0 -0
  21. data/corpus/frek.poliqarp.orth.index.atergo +0 -0
  22. data/corpus/frek.poliqarp.orth.offset +0 -0
  23. data/corpus/frek.poliqarp.rindex.amb +0 -0
  24. data/corpus/frek.poliqarp.rindex.amb.offset +0 -0
  25. data/corpus/frek.poliqarp.rindex.disamb +0 -0
  26. data/corpus/frek.poliqarp.rindex.disamb.offset +0 -0
  27. data/corpus/frek.poliqarp.rindex.orth +0 -0
  28. data/corpus/frek.poliqarp.rindex.orth.offset +0 -0
  29. data/corpus/frek.poliqarp.subchunk.image +0 -0
  30. data/corpus/frek.poliqarp.subchunk.item.ch +0 -0
  31. data/corpus/frek.poliqarp.subchunk.offset +0 -0
  32. data/corpus/frek.poliqarp.subpos1.image +0 -0
  33. data/corpus/frek.poliqarp.subpos1.offset +0 -0
  34. data/corpus/frek.poliqarp.subpos2.image +0 -0
  35. data/corpus/frek.poliqarp.subpos2.offset +0 -0
  36. data/corpus/frek.poliqarp.tag.image +0 -0
  37. data/corpus/frek.poliqarp.tag.offset +0 -0
  38. data/lib/poliqarpr-corpus.rb +3 -0
  39. data/poliqarpr-corpus.gemspec +17 -0
  40. metadata +103 -0
@@ -0,0 +1,71 @@
1
+ = poliqarpr-corpus
2
+
3
+ * http://github.com/apohllo/poliqarpr-corpus
4
+ * http://korpus.pl/index.php?page=download
5
+
6
+ == DESCRIPTION:
7
+
8
+ Default corpus for poliqarpr (Ruby client for Poliqarp server).
9
+
10
+
11
+ == FEATURES/PROBLEMS:
12
+
13
+ * Default corpus allows to test the poliqarpr Ruby client without need to
14
+ manually download or create the corpus.
15
+
16
+ == SYNOPSIS:
17
+
18
+ Poliqarpr is Ruby client for Poliqarp corpus server. The default corpus
19
+ for Poliqarpr is used in examples and testing.
20
+
21
+
22
+ == REQUIREMENTS:
23
+
24
+ * poliqarpr (sudo gem install poliqarpr)
25
+ * Poliqarp server (only C implementation http://poliqarp.sourceforge.net/)
26
+
27
+ == INSTALL:
28
+
29
+ You need RubyGems v. 1.2
30
+
31
+ * gem -v
32
+ * 1.2.0 #=> ok
33
+
34
+ You need the gemcutter.org repository to be added to your sources list:
35
+
36
+ * gem sources -a http://gemcutter.org
37
+
38
+ Then you can type:
39
+
40
+ * sudo gem install poliqarpr-corpus
41
+
42
+ == BASIC USAGE:
43
+
44
+ Require the gem:
45
+
46
+ require 'poliaqarpr'
47
+
48
+ Create the server client and open the default corpus
49
+
50
+ client = Poliqarp::Client.new
51
+ client.open_corpus :default
52
+
53
+ Query the corpus for given segment
54
+
55
+ result = client.find("kot")
56
+ result[0].to_s
57
+
58
+ Remember to close the client on exit
59
+
60
+ client.close
61
+
62
+ == LICENSE:
63
+
64
+ The sample corpus ("frek" form
65
+ http://korpus.pl/index.php?page=download) is distributed under
66
+ the GNU GPL license v 2.0
67
+ http://www.gnu.org/licenses/old-licenses/gpl-2.0.html.
68
+
69
+ == FEEDBACK
70
+
71
+ * mailto:apohllo@o2.pl
@@ -0,0 +1,23 @@
1
+ task :default => [:install]
2
+
3
+ $gem_name = "poliqarpr-corpus"
4
+
5
+ desc "Build the gem"
6
+ task :build do
7
+ sh "gem build #$gem_name.gemspec"
8
+ end
9
+
10
+ desc "Install the library at local machnie"
11
+ task :install => :build do
12
+ sh "sudo gem install #$gem_name -l"
13
+ end
14
+
15
+ desc "Uninstall the library from local machnie"
16
+ task :uninstall do
17
+ sh "sudo gem uninstall #$gem_name"
18
+ end
19
+
20
+ desc "Clean"
21
+ task :clean do
22
+ sh "rm #$gem_name*.gem"
23
+ end
@@ -0,0 +1,4 @@
1
+ version = 1
2
+ endianness = little-endian
3
+ indices = oda
4
+ index-granularity = 1024
@@ -0,0 +1,100 @@
1
+ # Config file format for Oasis release
2
+ # Config version 1.0
3
+
4
+ # The new startup section may contain any command normally accepted by the shell
5
+
6
+ [ALIASES]
7
+
8
+ masc = m1|m2|m3
9
+ verb = pact|ppas|winien|praet|bedzie|fin|impt|aglt|ger|imps|inf|pant|pcon
10
+ noun = subst|depr|xxs|ger|ppron12|ppron3
11
+ pron = ppron12|ppron3|siebie
12
+
13
+
14
+ [ATTR]
15
+
16
+ number = sg pl
17
+ case = nom gen dat acc inst loc voc
18
+ gender = m1 m2 m3 f n
19
+ person = pri sec ter
20
+ degree = pos comp sup
21
+ aspect = imperf perf
22
+ negation = aff neg
23
+ accommodability = congr rec
24
+ accentability = akc nakc
25
+ post-prepositionality = npraep praep
26
+ agglutination = agl nagl
27
+ vocalicity = nwok wok
28
+
29
+ # Parts of speech no longer need forward declarations, this was inconvenient and ugly.
30
+ # Also, any attribute may be optional so a declaration such as:
31
+ # foo = [bar] [froz] fred [wilma]
32
+ # should no longer cause problems and ctags with such attributes now parse correctly regardless
33
+ # of presence or absence of any optional attribute
34
+
35
+ [POS]
36
+
37
+ adja =
38
+ adjp =
39
+ conj =
40
+ interp =
41
+ pred =
42
+ xxx =
43
+ adv = degree
44
+ imps = aspect
45
+ inf = aspect
46
+ pant = aspect
47
+ pcon = aspect
48
+ qub = [vocalicity]
49
+ prep = case [vocalicity]
50
+ siebie = case
51
+ subst = number case gender
52
+ depr = number case gender
53
+ xxs = number case gender
54
+ ger = number case gender aspect negation
55
+ ppron12 = number case gender person [accentability]
56
+ ppron3 = number case gender person [accentability] [post-prepositionality]
57
+ num = number case gender [accommodability]
58
+ adj = number case gender degree
59
+ pact = number case gender aspect negation
60
+ ppas = number case gender aspect negation
61
+ winien = number gender aspect
62
+ praet = number gender aspect [agglutination]
63
+ bedzie = number person aspect
64
+ fin = number person aspect
65
+ impt = number person aspect
66
+ aglt = number person aspect vocalicity
67
+ ign =
68
+
69
+ # Named entities replaced old 'special' attributes, name changed mostly because of
70
+ # unification of 'named-thing' handling code into one named-entity thing
71
+ # Entity aliasing allows for any existing entity to be seen under different name
72
+ #
73
+ # FCQP provides four builtin entities:
74
+ # entity-current
75
+ # entity-base
76
+ # entity-tag
77
+ # entity-pos
78
+
79
+ [NAMED-ENTITY]
80
+
81
+ entity-orth = orth
82
+ entity-base = base
83
+ entity-tag = tag
84
+ entity-pos = pos
85
+
86
+ # Old 'aliases' for attribute names
87
+
88
+ pos = flex
89
+ number = numb nmb
90
+ case = cas
91
+ gender = gnd gend
92
+ person = per pers
93
+ degree = deg degr
94
+ aspect = asp
95
+ negation = neg
96
+ accommodability = acco acom acm
97
+ accentability = acce acen acn
98
+ post-prepositionality = ppr ppre
99
+ agglutination = agg aggl
100
+ vocalicity = vcl
@@ -0,0 +1,100 @@
1
+ # Config file format for Oasis release
2
+ # Config version 1.0
3
+
4
+ # The new startup section may contain any command normally accepted by the shell
5
+
6
+ [STARTUP]
7
+
8
+ /alias masc = m1 m2 m3
9
+ /alias verb = pact ppas winien praet bedzie fin impt aglt ger imps inf pant pcon
10
+ /alias noun = subst depr xxs ger ppron12 ppron3
11
+ /alias pron = ppron12 ppron3 siebie
12
+
13
+
14
+ [ATTR]
15
+
16
+ number = sg pl
17
+ case = nom gen dat acc inst loc voc
18
+ gender = m1 m2 m3 f n
19
+ person = pri sec ter
20
+ degree = pos comp sup
21
+ aspect = imperf perf
22
+ negation = aff neg
23
+ accommodability = congr rec
24
+ accentability = akc nakc
25
+ post-prepositionality = npraep praep
26
+ agglutination = agl nagl
27
+ vocalicity = nwok wok
28
+
29
+ # Parts of speech no longer need forward declarations, this was inconvenient and ugly.
30
+ # Also, any attribute may be optional so a declaration such as:
31
+ # foo = [bar] [froz] fred [wilma]
32
+ # should no longer cause problems and ctags with such attributes now parse correctly regardless
33
+ # of presence or absence of any optional attribute
34
+
35
+ [POS]
36
+
37
+ adja =
38
+ adjp =
39
+ conj =
40
+ interp =
41
+ pred =
42
+ xxx =
43
+ adv = degree
44
+ imps = aspect
45
+ inf = aspect
46
+ pant = aspect
47
+ pcon = aspect
48
+ qub = [vocalicity]
49
+ prep = case [vocalicity]
50
+ siebie = case
51
+ subst = number case gender
52
+ depr = number case gender
53
+ xxs = number case gender
54
+ ger = number case gender aspect negation
55
+ ppron12 = number case gender person [accentability]
56
+ ppron3 = number case gender person [accentability] [post-prepositionality]
57
+ num = number case gender [accommodability]
58
+ adj = number case gender degree
59
+ pact = number case gender aspect negation
60
+ ppas = number case gender aspect negation
61
+ winien = number gender aspect
62
+ praet = number gender aspect [agglutination]
63
+ bedzie = number person aspect
64
+ fin = number person aspect
65
+ impt = number person aspect
66
+ aglt = number person aspect vocalicity
67
+ ign =
68
+
69
+ # Named entities replaced old 'special' attributes, name changed mostly because of
70
+ # unification of 'named-thing' handling code into one named-entity thing
71
+ # Entity aliasing allows for any existing entity to be seen under different name
72
+ #
73
+ # FCQP provides four builtin entities:
74
+ # entity-current
75
+ # entity-base
76
+ # entity-tag
77
+ # entity-pos
78
+
79
+ [NAMED-ENTITY]
80
+
81
+ entity-orth = orth
82
+ entity-base = base
83
+ entity-tag = tag
84
+ entity-pos = pos
85
+
86
+ # Old 'aliases' for attribute names
87
+
88
+ pos = flex
89
+ number = numb nmb
90
+ case = cas
91
+ gender = gnd gend
92
+ person = per pers
93
+ degree = deg degr
94
+ aspect = asp
95
+ negation = neg
96
+ accommodability = acco acom acm
97
+ accentability = acce acen acn
98
+ post-prepositionality = ppr ppre
99
+ agglutination = agg aggl
100
+ vocalicity = vcl
@@ -0,0 +1 @@
1
+ S sample
@@ -0,0 +1,4 @@
1
+ (single "sample"
2
+ "/cesHeader/fileDesc/(sourceDesc/biblFull/)*sourceDesc/biblStruct/monogr/h.title")
3
+
4
+
@@ -0,0 +1,3 @@
1
+ module Poliqarp
2
+ DEFAULT_CORPUS = File.join(File.dirname(__FILE__), '..','corpus','frek')
3
+ end
@@ -0,0 +1,17 @@
1
+ Gem::Specification.new do |s|
2
+ s.name = "poliqarpr-corpus"
3
+ s.version = "1.0.1"
4
+ s.date = "2009-12-10"
5
+ s.summary = "Default corpus for poliqarpr"
6
+ s.email = "apohllo@o2.pl"
7
+ s.homepage = "http://www.github.com/apohllo/poliqarpr-corpus"
8
+ s.description = "Default corpus for Ruby client for Poliqarp (NLP corpus server)"
9
+ s.authors = ['Aleksander Pohl']
10
+ s.files = ["Rakefile", "poliqarpr-corpus.gemspec", 'lib/poliqarpr-corpus.rb',
11
+ "README.txt", ] + Dir.glob("corpus/**/*")
12
+ s.rdoc_options = ["--main", "README.txt"]
13
+ s.has_rdoc = true
14
+ s.extra_rdoc_files = ["README.txt"]
15
+ s.add_dependency("poliqarpr", [">= 0.0.3"])
16
+ end
17
+
metadata ADDED
@@ -0,0 +1,103 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: poliqarpr-corpus
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Aleksander Pohl
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-12-10 00:00:00 +01:00
13
+ default_executable:
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: poliqarpr
17
+ type: :runtime
18
+ version_requirement:
19
+ version_requirements: !ruby/object:Gem::Requirement
20
+ requirements:
21
+ - - ">="
22
+ - !ruby/object:Gem::Version
23
+ version: 0.0.3
24
+ version:
25
+ description: Default corpus for Ruby client for Poliqarp (NLP corpus server)
26
+ email: apohllo@o2.pl
27
+ executables: []
28
+
29
+ extensions: []
30
+
31
+ extra_rdoc_files:
32
+ - README.txt
33
+ files:
34
+ - Rakefile
35
+ - poliqarpr-corpus.gemspec
36
+ - lib/poliqarpr-corpus.rb
37
+ - README.txt
38
+ - corpus/frek.poliqarp.base2.offset
39
+ - corpus/frek.poliqarp.subchunk.image
40
+ - corpus/frek.poliqarp.meta-value.image
41
+ - corpus/frek.poliqarp.corpus.image
42
+ - corpus/frek.poliqarp.chunk.image
43
+ - corpus/frek.poliqarp.subpos1.image
44
+ - corpus/frek.poliqarp.subchunk.offset
45
+ - corpus/frek.poliqarp.subpos1.offset
46
+ - corpus/frek.poliqarp.rindex.amb.offset
47
+ - corpus/frek.poliqarp.base1.offset
48
+ - corpus/frek.poliqarp.subpos2.image
49
+ - corpus/frek.poliqarp.tag.image
50
+ - corpus/frek.poliqarp.rindex.orth
51
+ - corpus/frek.cfg~
52
+ - corpus/frek.poliqarp.orth.offset
53
+ - corpus/frek.meta.lisp
54
+ - corpus/frek.cdf
55
+ - corpus/frek.cfg
56
+ - corpus/frek.poliqarp.rindex.disamb
57
+ - corpus/frek.poliqarp.orth.index.alpha
58
+ - corpus/frek.poliqarp.subchunk.item.ch
59
+ - corpus/frek.poliqarp.meta-key.image
60
+ - corpus/frek.poliqarp.meta-value.offset
61
+ - corpus/frek.poliqarp.orth.index.atergo
62
+ - corpus/frek.poliqarp.base1.image
63
+ - corpus/frek.poliqarp.orth.image
64
+ - corpus/frek.poliqarp.meta.image
65
+ - corpus/frek.poliqarp.meta-key.offset
66
+ - corpus/frek.poliqarp.rindex.disamb.offset
67
+ - corpus/frek.poliqarp.rindex.orth.offset
68
+ - corpus/frek.poliqarp.subpos2.offset
69
+ - corpus/frek.poliqarp.rindex.amb
70
+ - corpus/frek.meta.cfg
71
+ - corpus/frek.poliqarp.tag.offset
72
+ - corpus/frek.poliqarp.base2.image
73
+ has_rdoc: true
74
+ homepage: http://www.github.com/apohllo/poliqarpr-corpus
75
+ licenses: []
76
+
77
+ post_install_message:
78
+ rdoc_options:
79
+ - --main
80
+ - README.txt
81
+ require_paths:
82
+ - lib
83
+ required_ruby_version: !ruby/object:Gem::Requirement
84
+ requirements:
85
+ - - ">="
86
+ - !ruby/object:Gem::Version
87
+ version: "0"
88
+ version:
89
+ required_rubygems_version: !ruby/object:Gem::Requirement
90
+ requirements:
91
+ - - ">="
92
+ - !ruby/object:Gem::Version
93
+ version: "0"
94
+ version:
95
+ requirements: []
96
+
97
+ rubyforge_project:
98
+ rubygems_version: 1.3.5
99
+ signing_key:
100
+ specification_version: 3
101
+ summary: Default corpus for poliqarpr
102
+ test_files: []
103
+