poliqarpr 0.0.8 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/changelog.txt +11 -6
- data/lib/poliqarpr.rb +1 -1
- data/lib/poliqarpr/client.rb +137 -158
- data/lib/poliqarpr/config.rb +138 -0
- data/lib/poliqarpr/connector.rb +7 -8
- data/lib/poliqarpr/exceptions.rb +1 -1
- data/lib/poliqarpr/excerpt.rb +7 -7
- data/lib/poliqarpr/query_result.rb +6 -6
- data/lib/poliqarpr/segment.rb +7 -7
- data/lib/poliqarpr/util.rb +1 -1
- data/poliqarpr.gemspec +3 -3
- data/spec/client.rb +29 -28
- data/spec/excerpt.rb +9 -9
- data/spec/query_result.rb +7 -7
- metadata +4 -3
    
        data/changelog.txt
    CHANGED
    
    | @@ -1,3 +1,8 @@ | |
| 1 | 
            +
            0.1.0
         | 
| 2 | 
            +
            - synchronization on ansync call without handler done via internal mutex
         | 
| 3 | 
            +
              instead of call to STATUS
         | 
| 4 | 
            +
            - client config moved to external class
         | 
| 5 | 
            +
             | 
| 1 6 | 
             
            0.0.8
         | 
| 2 7 | 
             
            - Speed optimization: socket puts changed to write
         | 
| 3 8 |  | 
| @@ -8,7 +13,7 @@ | |
| 8 13 |  | 
| 9 14 | 
             
            0.0.6
         | 
| 10 15 | 
             
            - fix: Excerpt#word - the words consituing the matched query
         | 
| 11 | 
            -
            - new: Excerpt#matched, Excerpt#right_context Excerpt#left_context | 
| 16 | 
            +
            - new: Excerpt#matched, Excerpt#right_context Excerpt#left_context
         | 
| 12 17 | 
             
              return the matched, right context, left context segments respecively
         | 
| 13 18 |  | 
| 14 19 | 
             
            0.0.5
         | 
| @@ -19,16 +24,16 @@ | |
| 19 24 | 
             
            - Documentation now points to gemcutter instead of github
         | 
| 20 25 |  | 
| 21 26 | 
             
            0.0.4
         | 
| 22 | 
            -
            - ping/pong diagnostics | 
| 27 | 
            +
            - ping/pong diagnostics
         | 
| 23 28 | 
             
            - server version
         | 
| 24 | 
            -
            - corpus statistics | 
| 29 | 
            +
            - corpus statistics
         | 
| 25 30 | 
             
            - implementation of asynchronous protocol (not stable)
         | 
| 26 31 |  | 
| 27 32 |  | 
| 28 33 | 
             
            0.0.3
         | 
| 29 34 | 
             
            - the license of the corpus included
         | 
| 30 | 
            -
            - client rdoc documentation | 
| 31 | 
            -
            - support for lemmata retrieval | 
| 35 | 
            +
            - client rdoc documentation
         | 
| 36 | 
            +
            - support for lemmata retrieval
         | 
| 32 37 | 
             
            - excerpt now contains segments instead of strings
         | 
| 33 38 | 
             
            - buffer size setter
         | 
| 34 39 | 
             
            - default corpus moved to separate plugin (sudo gem install apohllo-poliqarpr-corpus)
         | 
| @@ -45,6 +50,6 @@ | |
| 45 50 | 
             
            - README.txt included in gem
         | 
| 46 51 | 
             
            - specs included in gem
         | 
| 47 52 |  | 
| 48 | 
            -
            0.0.1 | 
| 53 | 
            +
            0.0.1
         | 
| 49 54 | 
             
            - initiali implementation
         | 
| 50 55 | 
             
            - synchorous querying for terms
         | 
    
        data/lib/poliqarpr.rb
    CHANGED
    
    
    
        data/lib/poliqarpr/client.rb
    CHANGED
    
    | @@ -3,61 +3,59 @@ module Poliqarp | |
| 3 3 | 
             
              # Author:: Aleksander Pohl (mailto:apohllo@o2.pl)
         | 
| 4 4 | 
             
              # License:: MIT License
         | 
| 5 5 | 
             
              #
         | 
| 6 | 
            -
              # This class is the implementation of the Poliqarp server client. | 
| 6 | 
            +
              # This class is the implementation of the Poliqarp server client.
         | 
| 7 7 | 
             
              class Client
         | 
| 8 | 
            -
                 | 
| 9 | 
            -
             | 
| 10 | 
            -
                # If debug is turned on, the communication between server and client 
         | 
| 8 | 
            +
                # If debug is turned on, the communication between server and client
         | 
| 11 9 | 
             
                # is logged to standard output.
         | 
| 12 10 | 
             
                attr_writer :debug
         | 
| 13 11 |  | 
| 14 | 
            -
                # The  | 
| 15 | 
            -
                 | 
| 16 | 
            -
                attr_writer :buffer_size
         | 
| 12 | 
            +
                # The configuration of the client.
         | 
| 13 | 
            +
                attr_reader :config
         | 
| 17 14 |  | 
| 18 | 
            -
                # Creates new poliqarp server client. | 
| 19 | 
            -
                # | 
| 15 | 
            +
                # Creates new poliqarp server client.
         | 
| 16 | 
            +
                #
         | 
| 20 17 | 
             
                # Parameters:
         | 
| 21 18 | 
             
                # * +session_name+ the name of the client session. Defaults to "RUBY".
         | 
| 22 19 | 
             
                # * +debug+ if set to true, all messages sent and received from server
         | 
| 23 20 | 
             
                #   are printed to standard output. Defaults to false.
         | 
| 24 21 | 
             
                def initialize(session_name="RUBY", debug=false)
         | 
| 25 22 | 
             
                  @session_name = session_name
         | 
| 26 | 
            -
                  @left_context = 5
         | 
| 27 | 
            -
                  @right_context = 5
         | 
| 28 23 | 
             
                  @debug = debug
         | 
| 29 | 
            -
                  @buffer_size = 500000
         | 
| 30 24 | 
             
                  @connector = Connector.new(debug)
         | 
| 25 | 
            +
                  @config = Config.new(self,500000)
         | 
| 31 26 | 
             
                  @answer_queue = Queue.new
         | 
| 27 | 
            +
                  @waiting_mutext = Mutex.new
         | 
| 32 28 | 
             
                  new_session
         | 
| 29 | 
            +
                  config.left_context_size = 5
         | 
| 30 | 
            +
                  config.right_context_size = 5
         | 
| 31 | 
            +
                  config.tags = []
         | 
| 32 | 
            +
                  config.lemmata = []
         | 
| 33 33 | 
             
                end
         | 
| 34 34 |  | 
| 35 35 | 
             
                # A hint about installation of default corpus gem
         | 
| 36 36 | 
             
                def self.const_missing(const)
         | 
| 37 | 
            -
                  if const.to_s =~ /DEFAULT_CORPUS/ | 
| 37 | 
            +
                  if const.to_s =~ /DEFAULT_CORPUS/
         | 
| 38 38 | 
             
                    raise "You need to install 'apohllo-poliqarpr-corpus' to use the default corpus"
         | 
| 39 39 | 
             
                  end
         | 
| 40 40 | 
             
                  super
         | 
| 41 41 | 
             
                end
         | 
| 42 42 |  | 
| 43 | 
            -
                # Creates new session for the client with the name given in constructor. | 
| 44 | 
            -
                # If the session was already opened, it is closed. | 
| 43 | 
            +
                # Creates new session for the client with the name given in constructor.
         | 
| 44 | 
            +
                # If the session was already opened, it is closed.
         | 
| 45 45 | 
             
                #
         | 
| 46 | 
            -
                # Parameters: | 
| 46 | 
            +
                # Parameters:
         | 
| 47 47 | 
             
                # * +port+ - the port on which the poliqarpd server is accepting connections (defaults to 4567)
         | 
| 48 48 | 
             
                def new_session(port=4567)
         | 
| 49 49 | 
             
                  close if @session
         | 
| 50 50 | 
             
                  @connector.open("localhost",port)
         | 
| 51 51 | 
             
                  talk("MAKE-SESSION #{@session_name}")
         | 
| 52 | 
            -
                  talk("BUFFER-RESIZE #{ | 
| 52 | 
            +
                  talk("BUFFER-RESIZE #{config.buffer_size}")
         | 
| 53 53 | 
             
                  @session = true
         | 
| 54 | 
            -
                  self.tags = {}
         | 
| 55 | 
            -
                  self.lemmata = {}
         | 
| 56 54 | 
             
                end
         | 
| 57 55 |  | 
| 58 56 | 
             
                # Closes the opened session.
         | 
| 59 57 | 
             
                def close
         | 
| 60 | 
            -
                  talk "CLOSE-SESSION" | 
| 58 | 
            +
                  talk "CLOSE-SESSION"
         | 
| 61 59 | 
             
                  @session = false
         | 
| 62 60 | 
             
                end
         | 
| 63 61 |  | 
| @@ -66,114 +64,36 @@ module Poliqarp | |
| 66 64 | 
             
                  talk "CLOSE"
         | 
| 67 65 | 
             
                end
         | 
| 68 66 |  | 
| 69 | 
            -
                # Sets the size of the left short context. It must be > 0
         | 
| 70 | 
            -
                #
         | 
| 71 | 
            -
                # The size of the left short context is the number 
         | 
| 72 | 
            -
                # of segments displayed in the found excerpts left to the
         | 
| 73 | 
            -
                # matched segment(s).
         | 
| 74 | 
            -
                def left_context=(value)
         | 
| 75 | 
            -
                  if correct_context_value?(value) 
         | 
| 76 | 
            -
                    result = talk("SET left-context-width #{value}")
         | 
| 77 | 
            -
                    @left_context = value if result =~ /^R OK/
         | 
| 78 | 
            -
                  else
         | 
| 79 | 
            -
                    raise "Invalid argument: #{value}. It must be fixnum greater than 0."
         | 
| 80 | 
            -
                  end
         | 
| 81 | 
            -
                end
         | 
| 82 | 
            -
             | 
| 83 | 
            -
                # Sets the size of the right short context. It must be > 0
         | 
| 84 | 
            -
                #
         | 
| 85 | 
            -
                # The size of the right short context is the number 
         | 
| 86 | 
            -
                # of segments displayed in the found excerpts right to the
         | 
| 87 | 
            -
                # matched segment(s).
         | 
| 88 | 
            -
                def right_context=(value)
         | 
| 89 | 
            -
                  if correct_context_value?(value)
         | 
| 90 | 
            -
                    result = talk("SET right-context-width #{value}")
         | 
| 91 | 
            -
                    @right_context = value if result =~ /^R OK/
         | 
| 92 | 
            -
                  else
         | 
| 93 | 
            -
                    raise "Invalid argument: #{value}. It must be fixnum greater than 0."
         | 
| 94 | 
            -
                  end
         | 
| 95 | 
            -
                end
         | 
| 96 | 
            -
             | 
| 97 | 
            -
                # Sets the tags' flags. There are four groups of segments 
         | 
| 98 | 
            -
                # which the flags apply for:
         | 
| 99 | 
            -
                # * +left_context+
         | 
| 100 | 
            -
                # * +left_match+
         | 
| 101 | 
            -
                # * +right_match+
         | 
| 102 | 
            -
                # * +right_context+
         | 
| 103 | 
            -
                #
         | 
| 104 | 
            -
                # If the flag for given group is set to true, all segments 
         | 
| 105 | 
            -
                # in the group are annotated with grammatical tags. E.g.:
         | 
| 106 | 
            -
                #  c.find("kot")
         | 
| 107 | 
            -
                #  ...
         | 
| 108 | 
            -
                #  "kot" tags: "subst:sg:nom:m2"
         | 
| 109 | 
            -
                #
         | 
| 110 | 
            -
                # You can pass :all to turn on flags for all groups
         | 
| 111 | 
            -
                def tags=(options={})
         | 
| 112 | 
            -
                  options = set_all_flags if options == :all
         | 
| 113 | 
            -
                  @tag_flags = options
         | 
| 114 | 
            -
                  flags = ""
         | 
| 115 | 
            -
                  GROUPS.each do |flag|
         | 
| 116 | 
            -
                    flags << (options[flag] ? "1" : "0")
         | 
| 117 | 
            -
                    end
         | 
| 118 | 
            -
                  talk("SET retrieve-tags #{flags}")
         | 
| 119 | 
            -
                end
         | 
| 120 | 
            -
             | 
| 121 | 
            -
                # Sets the lemmatas' flags. There are four groups of segments 
         | 
| 122 | 
            -
                # which the flags apply for:
         | 
| 123 | 
            -
                # * +left_context+
         | 
| 124 | 
            -
                # * +left_match+
         | 
| 125 | 
            -
                # * +right_match+
         | 
| 126 | 
            -
                # * +right_context+
         | 
| 127 | 
            -
                #
         | 
| 128 | 
            -
                # If the flag for given group is set to true, all segments 
         | 
| 129 | 
            -
                # in the group are returned with the base form of the lemmata. E.g.:
         | 
| 130 | 
            -
                #  c.find("kotu")
         | 
| 131 | 
            -
                #  ...
         | 
| 132 | 
            -
                #  "kotu" base_form: "kot"
         | 
| 133 | 
            -
                #
         | 
| 134 | 
            -
                # You can pass :all to turn on flags for all groups
         | 
| 135 | 
            -
                def lemmata=(options={})
         | 
| 136 | 
            -
                  options = set_all_flags if options == :all
         | 
| 137 | 
            -
                  @lemmata_flags = options
         | 
| 138 | 
            -
                  flags = ""
         | 
| 139 | 
            -
                  GROUPS.each do |flag|
         | 
| 140 | 
            -
                    flags << (options[flag] ? "1" : "0")
         | 
| 141 | 
            -
                    end
         | 
| 142 | 
            -
                  talk("SET retrieve-lemmata #{flags}")
         | 
| 143 | 
            -
                end
         | 
| 144 | 
            -
             | 
| 145 67 | 
             
                # *Asynchronous* Opens the corpus given as +path+. To open the default
         | 
| 146 | 
            -
                # corpus pass +:default+ as the argument. | 
| 147 | 
            -
                # | 
| 68 | 
            +
                # corpus pass +:default+ as the argument.
         | 
| 69 | 
            +
                #
         | 
| 148 70 | 
             
                # If you don't want to wait until the call is finished, you
         | 
| 149 71 | 
             
                # have to provide +handler+ for the asynchronous answer.
         | 
| 150 72 | 
             
                def open_corpus(path, &handler)
         | 
| 151 73 | 
             
                  if path == :default
         | 
| 152 74 | 
             
                    open_corpus(DEFAULT_CORPUS, &handler)
         | 
| 153 75 | 
             
                  else
         | 
| 154 | 
            -
                     | 
| 155 | 
            -
                    talk("OPEN #{path}", :async, &real_handler)
         | 
| 156 | 
            -
                    do_wait if handler.nil?
         | 
| 76 | 
            +
                    talk("OPEN #{path}", :async, &handler)
         | 
| 157 77 | 
             
                  end
         | 
| 158 78 | 
             
                end
         | 
| 159 79 |  | 
| 160 80 | 
             
                # Server diagnostics -- the result should be :pong
         | 
| 161 | 
            -
                def ping | 
| 81 | 
            +
                def ping
         | 
| 162 82 | 
             
                  :pong if talk("PING") =~ /PONG/
         | 
| 163 83 | 
             
                end
         | 
| 164 84 |  | 
| 165 85 | 
             
                # Returns server version
         | 
| 166 | 
            -
                def version | 
| 86 | 
            +
                def version
         | 
| 167 87 | 
             
                  talk("VERSION")
         | 
| 168 88 | 
             
                end
         | 
| 169 89 |  | 
| 170 90 | 
             
                # Returns corpus statistics:
         | 
| 171 | 
            -
                # * +:segment_tokens+ the number of segments in the corpus | 
| 91 | 
            +
                # * +:segment_tokens+ the number of segments in the corpus
         | 
| 172 92 | 
             
                #   (two segments which look exactly the same are counted separately)
         | 
| 173 93 | 
             
                # * +:segment_types+ the number of segment types in the corpus
         | 
| 174 94 | 
             
                #   (two segments which look exactly the same are counted as one type)
         | 
| 175 95 | 
             
                # * +:lemmata+ the number of lemmata (lexemes) types
         | 
| 176 | 
            -
                #   (all forms of inflected word, e.g. 'kot', 'kotu', ... | 
| 96 | 
            +
                #   (all forms of inflected word, e.g. 'kot', 'kotu', ...
         | 
| 177 97 | 
             
                #   are treated as one "word" -- lemmata)
         | 
| 178 98 | 
             
                # * +:tags+ the number of different grammar tags (each combination
         | 
| 179 99 | 
             
                #   of atomic tags is treated as different "tag")
         | 
| @@ -181,7 +101,7 @@ module Poliqarp | |
| 181 101 | 
             
                  stats = {}
         | 
| 182 102 | 
             
                  talk("CORPUS-STATS").split.each_with_index do |value, index|
         | 
| 183 103 | 
             
                    case index
         | 
| 184 | 
            -
                    when 1 | 
| 104 | 
            +
                    when 1
         | 
| 185 105 | 
             
                      stats[:segment_tokens] = value.to_i
         | 
| 186 106 | 
             
                    when 2
         | 
| 187 107 | 
             
                      stats[:segment_types] = value.to_i
         | 
| @@ -205,7 +125,7 @@ module Poliqarp | |
| 205 125 | 
             
                #   (each category has a list of its tags, eg. gender: m1 m2 m3 f n,
         | 
| 206 126 | 
             
                #   means that there are 5 genders: masculine(1,2,3), feminine and neuter)
         | 
| 207 127 | 
             
                # * +:classes+ enlists grammatical tags used to describe it
         | 
| 208 | 
            -
                #   (each class has a list of tags used to describe it, eg. adj: degree | 
| 128 | 
            +
                #   (each class has a list of tags used to describe it, eg. adj: degree
         | 
| 209 129 | 
             
                #   gender case number, means that adjectives are described in terms
         | 
| 210 130 | 
             
                #   of degree, gender, case and number)
         | 
| 211 131 | 
             
                def tagset
         | 
| @@ -226,14 +146,14 @@ module Poliqarp | |
| 226 146 | 
             
                #
         | 
| 227 147 | 
             
                # Options:
         | 
| 228 148 | 
             
                # * +index+ the index of the (only one) result to be returned. The index is relative
         | 
| 229 | 
            -
                #   to the beginning of the query result. In normal case you should query the | 
| 149 | 
            +
                #   to the beginning of the query result. In normal case you should query the
         | 
| 230 150 | 
             
                #   corpus without specifying the index, to see what results are returned.
         | 
| 231 | 
            -
                #   Then you can use the index and the same query to retrieve one result. | 
| 151 | 
            +
                #   Then you can use the index and the same query to retrieve one result.
         | 
| 232 152 | 
             
                #   The pair (query, index) is a kind of unique identifier of the excerpt.
         | 
| 233 153 | 
             
                # * +page_size+ the size of the page of results. If the page size is 0, then
         | 
| 234 154 | 
             
                #   all results are returned on one page. It is ignored if the +index+ option
         | 
| 235 155 | 
             
                #   is present. Defaults to 0.
         | 
| 236 | 
            -
                # * +page_index+ the index of the page of results (the first page has index 1, not 0). | 
| 156 | 
            +
                # * +page_index+ the index of the page of results (the first page has index 1, not 0).
         | 
| 237 157 | 
             
                #   It is ignored if the +index+ option is present. Defaults to 1.
         | 
| 238 158 | 
             
                def find(query,options={})
         | 
| 239 159 | 
             
                  if options[:index]
         | 
| @@ -243,11 +163,11 @@ module Poliqarp | |
| 243 163 | 
             
                  end
         | 
| 244 164 | 
             
                end
         | 
| 245 165 |  | 
| 246 | 
            -
                alias query find | 
| 166 | 
            +
                alias query find
         | 
| 247 167 |  | 
| 248 168 | 
             
                # Returns the number of results for given query.
         | 
| 249 169 | 
             
                def count(query)
         | 
| 250 | 
            -
                  count_results(make_query(query)) | 
| 170 | 
            +
                  count_results(make_query(query))
         | 
| 251 171 | 
             
                end
         | 
| 252 172 |  | 
| 253 173 | 
             
                # Returns the long context of the excerpt which is identified by
         | 
| @@ -257,13 +177,13 @@ module Poliqarp | |
| 257 177 | 
             
                  result = []
         | 
| 258 178 | 
             
                  talk "GET-CONTEXT #{index}"
         | 
| 259 179 | 
             
                  # 1st part
         | 
| 260 | 
            -
                  result << read_word | 
| 180 | 
            +
                  result << read_word
         | 
| 261 181 | 
             
                  # 2nd part
         | 
| 262 | 
            -
                  result << read_word | 
| 182 | 
            +
                  result << read_word
         | 
| 263 183 | 
             
                  # 3rd part
         | 
| 264 | 
            -
                  result << read_word | 
| 184 | 
            +
                  result << read_word
         | 
| 265 185 | 
             
                  # 4th part
         | 
| 266 | 
            -
                  result << read_word | 
| 186 | 
            +
                  result << read_word
         | 
| 267 187 | 
             
                  result
         | 
| 268 188 | 
             
                end
         | 
| 269 189 |  | 
| @@ -286,19 +206,62 @@ module Poliqarp | |
| 286 206 | 
             
                end
         | 
| 287 207 |  | 
| 288 208 | 
             
            protected
         | 
| 209 | 
            +
                # Set the size of the left context.
         | 
| 210 | 
            +
                def left_context=(value)
         | 
| 211 | 
            +
                  result = talk("SET left-context-width #{value}")
         | 
| 212 | 
            +
                  unless result =~ /^OK/
         | 
| 213 | 
            +
                    raise "Failed to set left context to #{value}: #{result}"
         | 
| 214 | 
            +
                  end
         | 
| 215 | 
            +
                end
         | 
| 216 | 
            +
             | 
| 217 | 
            +
                # Set the size of the right context.
         | 
| 218 | 
            +
                def right_context=(value)
         | 
| 219 | 
            +
                  result = talk("SET right-context-width #{value}")
         | 
| 220 | 
            +
                  unless result =~ /^OK/
         | 
| 221 | 
            +
                    raise "Failed to set right context to #{value}: #{result}"
         | 
| 222 | 
            +
                  end
         | 
| 223 | 
            +
                end
         | 
| 224 | 
            +
             | 
| 225 | 
            +
                # Sets the 'retrieve-tags' flags.
         | 
| 226 | 
            +
                def retrieve_tags(flags)
         | 
| 227 | 
            +
                  talk("SET retrieve-tags #{flags}")
         | 
| 228 | 
            +
                end
         | 
| 229 | 
            +
             | 
| 230 | 
            +
                # Sets the 'retrieve-lemmata' flags.
         | 
| 231 | 
            +
                def retrieve_lemmata(flags)
         | 
| 232 | 
            +
                  talk("SET retrieve-lemmata #{flags}")
         | 
| 233 | 
            +
                end
         | 
| 234 | 
            +
             | 
| 235 | 
            +
             | 
| 289 236 | 
             
                # Sends a message directly to the server
         | 
| 290 237 | 
             
                # * +msg+ the message to send
         | 
| 291 238 | 
             
                # * +mode+ if set to :sync, the method block untli the message
         | 
| 292 239 | 
             
                #   is received. If :async the method returns immediately.
         | 
| 293 240 | 
             
                #   Default: :sync
         | 
| 294 | 
            -
                # * +handler+ the handler of the assynchronous message. | 
| 241 | 
            +
                # * +handler+ the handler of the assynchronous message.
         | 
| 295 242 | 
             
                #   It is ignored when the mode is set to :sync.
         | 
| 296 243 | 
             
                def talk(msg, mode = :sync, &handler)
         | 
| 297 244 | 
             
                  puts msg if @debug
         | 
| 298 | 
            -
                   | 
| 245 | 
            +
                  if mode == :sync
         | 
| 246 | 
            +
                    @connector.send_message(msg, mode, &handler)
         | 
| 247 | 
            +
                  else
         | 
| 248 | 
            +
                    if handler.nil?
         | 
| 249 | 
            +
                      real_handler = lambda do |msg|
         | 
| 250 | 
            +
                        @answer_queue.push msg
         | 
| 251 | 
            +
                        stop_waiting
         | 
| 252 | 
            +
                      end
         | 
| 253 | 
            +
                      start_waiting
         | 
| 254 | 
            +
                    else
         | 
| 255 | 
            +
                      real_handler = handler
         | 
| 256 | 
            +
                    end
         | 
| 257 | 
            +
                    @connector.send_message(msg, mode, &real_handler)
         | 
| 258 | 
            +
                    if handler.nil?
         | 
| 259 | 
            +
                      do_wait
         | 
| 260 | 
            +
                    end
         | 
| 261 | 
            +
                  end
         | 
| 299 262 | 
             
                end
         | 
| 300 263 |  | 
| 301 | 
            -
                # Make query and retrieve many results. | 
| 264 | 
            +
                # Make query and retrieve many results.
         | 
| 302 265 | 
             
                # * +query+ the query to be sent to the server.
         | 
| 303 266 | 
             
                # * +options+ see find
         | 
| 304 267 | 
             
                def find_many(query, options)
         | 
| @@ -308,7 +271,7 @@ protected | |
| 308 271 | 
             
                  answer_offset = page_size * (page_index - 1)
         | 
| 309 272 | 
             
                  if page_size > 0
         | 
| 310 273 | 
             
                    result_count = make_async_query(query,answer_offset)
         | 
| 311 | 
            -
                    answers_limit = answer_offset + page_size > result_count ? | 
| 274 | 
            +
                    answers_limit = answer_offset + page_size > result_count ?
         | 
| 312 275 | 
             
                      result_count - answer_offset : page_size
         | 
| 313 276 | 
             
                  else
         | 
| 314 277 | 
             
                    # all answers needed -- the call must be synchronous
         | 
| @@ -321,12 +284,12 @@ protected | |
| 321 284 |  | 
| 322 285 | 
             
                  result = QueryResult.new(page_index, page_count,page_size,self,query)
         | 
| 323 286 | 
             
                  if answers_limit > 0
         | 
| 324 | 
            -
                    talk("GET-RESULTS #{answer_offset} #{answer_offset + answers_limit - 1}") | 
| 287 | 
            +
                    talk("GET-RESULTS #{answer_offset} #{answer_offset + answers_limit - 1}")
         | 
| 325 288 | 
             
                    answers_limit.times do |answer_index|
         | 
| 326 289 | 
             
                      result << fetch_result(answer_offset + answer_index, query)
         | 
| 327 290 | 
             
                    end
         | 
| 328 291 | 
             
                  end
         | 
| 329 | 
            -
                  result | 
| 292 | 
            +
                  result
         | 
| 330 293 | 
             
                end
         | 
| 331 294 |  | 
| 332 295 | 
             
                # Make query and retrieve only one result
         | 
| @@ -334,13 +297,13 @@ protected | |
| 334 297 | 
             
                # * +index+ the index of the answer to be retrieved
         | 
| 335 298 | 
             
                def find_one(query,index)
         | 
| 336 299 | 
             
                  make_async_query(query,index)
         | 
| 337 | 
            -
                  talk("GET-RESULTS #{index} #{index}") | 
| 338 | 
            -
                  fetch_result(index,query) | 
| 300 | 
            +
                  talk("GET-RESULTS #{index} #{index}")
         | 
| 301 | 
            +
                  fetch_result(index,query)
         | 
| 339 302 | 
             
                end
         | 
| 340 303 |  | 
| 341 304 | 
             
                # Fetches one result of the query
         | 
| 342 305 | 
             
                #
         | 
| 343 | 
            -
                # MAKE-QUERY and GET-RESULTS must be sent to the server before | 
| 306 | 
            +
                # MAKE-QUERY and GET-RESULTS must be sent to the server before
         | 
| 344 307 | 
             
                # this method is called
         | 
| 345 308 | 
             
                def fetch_result(index, query)
         | 
| 346 309 | 
             
                  result = Excerpt.new(index, self, query)
         | 
| @@ -357,15 +320,15 @@ protected | |
| 357 320 | 
             
                  segments = []
         | 
| 358 321 | 
             
                  size.times do |segment_index|
         | 
| 359 322 | 
             
                    segment = Segment.new(read_word)
         | 
| 360 | 
            -
                    segments << segment | 
| 361 | 
            -
                    if  | 
| 323 | 
            +
                    segments << segment
         | 
| 324 | 
            +
                    if config.lemmata.include?(group) || config.tags.include?(group)
         | 
| 362 325 | 
             
                      lemmata_size = read_number()
         | 
| 363 | 
            -
                      lemmata_size.times do |lemmata_index| | 
| 326 | 
            +
                      lemmata_size.times do |lemmata_index|
         | 
| 364 327 | 
             
                        lemmata = Lemmata.new()
         | 
| 365 | 
            -
                        if  | 
| 328 | 
            +
                        if config.lemmata.include?(group)
         | 
| 366 329 | 
             
                          lemmata.base_form = read_word
         | 
| 367 330 | 
             
                        end
         | 
| 368 | 
            -
                        if  | 
| 331 | 
            +
                        if config.tags.include?(group)
         | 
| 369 332 | 
             
                          lemmata.tags = read_word
         | 
| 370 333 | 
             
                        end
         | 
| 371 334 | 
             
                        segment.lemmata << lemmata
         | 
| @@ -377,7 +340,9 @@ protected | |
| 377 340 |  | 
| 378 341 | 
             
                # Reads number stored in the message received from the server.
         | 
| 379 342 | 
             
                def read_number
         | 
| 380 | 
            -
                  @connector.read_message | 
| 343 | 
            +
                  msg = @connector.read_message
         | 
| 344 | 
            +
                  puts "XXX #{msg}" if @debug
         | 
| 345 | 
            +
                  msg.match(/\d+/)[0].to_i
         | 
| 381 346 | 
             
                end
         | 
| 382 347 |  | 
| 383 348 | 
             
                # Counts number of results for given answer
         | 
| @@ -387,25 +352,22 @@ protected | |
| 387 352 |  | 
| 388 353 | 
             
                # *Asynchronous* Sends the query to the server
         | 
| 389 354 | 
             
                # * +query+ query to send
         | 
| 390 | 
            -
                # * +handler+ if given, the method returns immediately, | 
| 355 | 
            +
                # * +handler+ if given, the method returns immediately,
         | 
| 391 356 | 
             
                #   and the answer is sent to the handler. In this case
         | 
| 392 357 | 
             
                #   the result returned by make_query should be IGNORED!
         | 
| 393 358 | 
             
                def make_query(query, &handler)
         | 
| 394 359 | 
             
                  if @last_query != query
         | 
| 395 360 | 
             
                    @last_query = query
         | 
| 396 | 
            -
                    if handler.nil?
         | 
| 397 | 
            -
                      real_handler = lambda { |msg| @answer_queue.push msg }
         | 
| 398 | 
            -
                    else
         | 
| 399 | 
            -
                      real_handler = handler
         | 
| 400 | 
            -
                    end
         | 
| 401 361 | 
             
                    begin
         | 
| 402 362 | 
             
                      talk("MAKE-QUERY #{query}")
         | 
| 403 363 | 
             
                    rescue JobInProgress
         | 
| 404 364 | 
             
                      talk("CANCEL") rescue nil
         | 
| 405 365 | 
             
                      talk("MAKE-QUERY #{query}")
         | 
| 406 366 | 
             
                    end
         | 
| 407 | 
            -
                    talk("RUN-QUERY #{ | 
| 408 | 
            -
                     | 
| 367 | 
            +
                    result = talk("RUN-QUERY #{config.buffer_size}", :async, &handler)
         | 
| 368 | 
            +
                    if handler.nil?
         | 
| 369 | 
            +
                      @last_result = result
         | 
| 370 | 
            +
                    end
         | 
| 409 371 | 
             
                  end
         | 
| 410 372 | 
             
                  @last_result
         | 
| 411 373 | 
             
                end
         | 
| @@ -415,38 +377,55 @@ protected | |
| 415 377 | 
             
                  @connector.read_message
         | 
| 416 378 | 
             
                end
         | 
| 417 379 |  | 
| 418 | 
            -
            private | 
| 380 | 
            +
                private
         | 
| 381 | 
            +
                # Wait for the assynchronous answer, if some synchronous query
         | 
| 382 | 
            +
                # was sent without handler.
         | 
| 419 383 | 
             
                def do_wait
         | 
| 420 384 | 
             
                  loop {
         | 
| 421 | 
            -
                     | 
| 422 | 
            -
                    puts " | 
| 423 | 
            -
                    sleep 0. | 
| 385 | 
            +
                    break unless should_wait?
         | 
| 386 | 
            +
                    puts "WAITING" if @debug
         | 
| 387 | 
            +
                    sleep 0.1
         | 
| 424 388 | 
             
                  }
         | 
| 425 389 | 
             
                  @answer_queue.shift
         | 
| 426 390 | 
             
                end
         | 
| 427 391 |  | 
| 428 | 
            -
                 | 
| 429 | 
            -
             | 
| 430 | 
            -
                   | 
| 431 | 
            -
             | 
| 392 | 
            +
                # Stop waiting for the ansynchonous answer.
         | 
| 393 | 
            +
                def stop_waiting
         | 
| 394 | 
            +
                  @waiting_mutext.synchronize {
         | 
| 395 | 
            +
                    @should_wait = false
         | 
| 396 | 
            +
                  }
         | 
| 397 | 
            +
                  puts "WAITING stopped" if @debug
         | 
| 398 | 
            +
                end
         | 
| 399 | 
            +
             | 
| 400 | 
            +
                # Check if the thread should still wait for the answer.
         | 
| 401 | 
            +
                def should_wait?
         | 
| 402 | 
            +
                  should_wait = nil
         | 
| 403 | 
            +
                  @waiting_mutext.synchronize {
         | 
| 404 | 
            +
                    should_wait = @should_wait
         | 
| 405 | 
            +
                  }
         | 
| 406 | 
            +
                  should_wait
         | 
| 432 407 | 
             
                end
         | 
| 433 | 
            -
             | 
| 434 | 
            -
                 | 
| 435 | 
            -
             | 
| 408 | 
            +
             | 
| 409 | 
            +
                # Start waiting for the answer.
         | 
| 410 | 
            +
                def start_waiting
         | 
| 411 | 
            +
                  @waiting_mutext.synchronize {
         | 
| 412 | 
            +
                    @should_wait = true
         | 
| 413 | 
            +
                  }
         | 
| 414 | 
            +
                  puts "WAITING started" if @debug
         | 
| 436 415 | 
             
                end
         | 
| 437 416 |  | 
| 438 | 
            -
                def make_async_query(query,answer_offset) | 
| 439 | 
            -
                   | 
| 440 | 
            -
                  # BUFFER-STATE call
         | 
| 441 | 
            -
                  make_query(query){|msg| }
         | 
| 442 | 
            -
                  result_count = 0 | 
| 443 | 
            -
                  begin | 
| 417 | 
            +
                def make_async_query(query,answer_offset)
         | 
| 418 | 
            +
                  start_waiting
         | 
| 419 | 
            +
                  # we access the result count through BUFFER-STATE call
         | 
| 420 | 
            +
                  make_query(query){|msg| stop_waiting}
         | 
| 421 | 
            +
                  result_count = 0
         | 
| 422 | 
            +
                  begin
         | 
| 444 423 | 
             
                    # the result count might be not exact!
         | 
| 445 424 | 
             
                    result_count = talk("BUFFER-STATE").split(" ")[2].to_i
         | 
| 446 | 
            -
                     | 
| 425 | 
            +
                    break unless should_wait?
         | 
| 447 426 | 
             
                  end while result_count < answer_offset
         | 
| 448 427 | 
             
                  @last_result = "OK #{result_count}"
         | 
| 449 428 | 
             
                  result_count
         | 
| 450 429 | 
             
                end
         | 
| 451 | 
            -
              end | 
| 430 | 
            +
              end
         | 
| 452 431 | 
             
            end
         | 
| @@ -0,0 +1,138 @@ | |
| 1 | 
            +
            # vim:encoding=utf-8
         | 
| 2 | 
            +
            module Poliqarp
         | 
| 3 | 
            +
              # Author:: Aleksander Pohl (mailto:apohllo@o2.pl)
         | 
| 4 | 
            +
              # License:: MIT License
         | 
| 5 | 
            +
              #
         | 
| 6 | 
            +
              # This class holds the configuration of the client.
         | 
| 7 | 
            +
              class Config
         | 
| 8 | 
            +
                GROUPS = [:left_context, :left_match, :right_match, :right_context]
         | 
| 9 | 
            +
                # The size of the buffer is the maximum number of excerpts which
         | 
| 10 | 
            +
                # are returned for single query.
         | 
| 11 | 
            +
                attr_accessor :buffer_size, :left_context_size, :right_context_size, :tags, :lemmata
         | 
| 12 | 
            +
             | 
| 13 | 
            +
                def initialize(client,buffer_size)
         | 
| 14 | 
            +
                  @client = client
         | 
| 15 | 
            +
                  @buffer_size = buffer_size
         | 
| 16 | 
            +
                end
         | 
| 17 | 
            +
             | 
| 18 | 
            +
                # Sets the size of the left short context. It must be > 0
         | 
| 19 | 
            +
                #
         | 
| 20 | 
            +
                # The size of the left short context is the number
         | 
| 21 | 
            +
                # of segments displayed in the found excerpts left to the
         | 
| 22 | 
            +
                # matched segment(s).
         | 
| 23 | 
            +
                def left_context_size=(value)
         | 
| 24 | 
            +
                  if correct_context_value?(value)
         | 
| 25 | 
            +
                    @client.send(:left_context=,value)
         | 
| 26 | 
            +
                    @left_context_size = value
         | 
| 27 | 
            +
                  else
         | 
| 28 | 
            +
                    raise "Invalid argument: #{value}. It must be fixnum greater than 0."
         | 
| 29 | 
            +
                  end
         | 
| 30 | 
            +
                end
         | 
| 31 | 
            +
             | 
| 32 | 
            +
                # Sets the size of the right short context. It must be > 0
         | 
| 33 | 
            +
                #
         | 
| 34 | 
            +
                # The size of the right short context is the number
         | 
| 35 | 
            +
                # of segments displayed in the found excerpts right to the
         | 
| 36 | 
            +
                # matched segment(s).
         | 
| 37 | 
            +
                def right_context_size=(value)
         | 
| 38 | 
            +
                  if correct_context_value?(value)
         | 
| 39 | 
            +
                    @client.send(:right_context=,value)
         | 
| 40 | 
            +
                    @right_context_size = value
         | 
| 41 | 
            +
                  else
         | 
| 42 | 
            +
                    raise "Invalid argument: #{value}. It must be fixnum greater than 0."
         | 
| 43 | 
            +
                  end
         | 
| 44 | 
            +
                end
         | 
| 45 | 
            +
             | 
| 46 | 
            +
                # Sets the tags' flags. There are four groups of segments
         | 
| 47 | 
            +
                # which the flags apply for:
         | 
| 48 | 
            +
                # * +:left_context+
         | 
| 49 | 
            +
                # * +:left_match+
         | 
| 50 | 
            +
                # * +:right_match+
         | 
| 51 | 
            +
                # * +:right_context+
         | 
| 52 | 
            +
                #
         | 
| 53 | 
            +
                # If the flag for given group is present, all segments
         | 
| 54 | 
            +
                # in the group are annotated with grammatical tags. E.g.:
         | 
| 55 | 
            +
                #  c.find("kot")
         | 
| 56 | 
            +
                #  ...
         | 
| 57 | 
            +
                #  "kot" tags: "subst:sg:nom:m2"
         | 
| 58 | 
            +
                #
         | 
| 59 | 
            +
                # E.g. config.tags = [:left_context] will retrieve tags
         | 
| 60 | 
            +
                # only for the left context.
         | 
| 61 | 
            +
                #
         | 
| 62 | 
            +
                # You can pass :all to turn on flags for all groups, i.e.
         | 
| 63 | 
            +
                # config.tags = :all will retrieve tags for all groups.
         | 
| 64 | 
            +
                def tags=(groups)
         | 
| 65 | 
            +
                  if groups == :all
         | 
| 66 | 
            +
                    @tags = GROUPS.dup
         | 
| 67 | 
            +
                  else
         | 
| 68 | 
            +
                    @tags = groups
         | 
| 69 | 
            +
                  end
         | 
| 70 | 
            +
                  @client.send(:retrieve_tags, flags_for(@tags))
         | 
| 71 | 
            +
                end
         | 
| 72 | 
            +
             | 
| 73 | 
            +
                # Sets the lemmatas' flags. There are four groups of segments
         | 
| 74 | 
            +
                # which the flags apply for:
         | 
| 75 | 
            +
                # * +left_context+
         | 
| 76 | 
            +
                # * +left_match+
         | 
| 77 | 
            +
                # * +right_match+
         | 
| 78 | 
            +
                # * +right_context+
         | 
| 79 | 
            +
                #
         | 
| 80 | 
            +
                # If the flag for given group is present, all segments
         | 
| 81 | 
            +
                # in the group are returned with the base form of the lemmata. E.g.:
         | 
| 82 | 
            +
                #  c.find("kotu")
         | 
| 83 | 
            +
                #  ...
         | 
| 84 | 
            +
                #  "kotu" base_form: "kot"
         | 
| 85 | 
            +
                #
         | 
| 86 | 
            +
                # E.g. config.lemmata = [:left_context] will retrieve lemmata
         | 
| 87 | 
            +
                # only for the left context.
         | 
| 88 | 
            +
                #
         | 
| 89 | 
            +
                # You can pass :all to turn on flags for all groups, i.e.
         | 
| 90 | 
            +
                # config.lemmata = :all will retrieve lemmata for all groups.
         | 
| 91 | 
            +
                def lemmata=(groups)
         | 
| 92 | 
            +
                  if groups == :all
         | 
| 93 | 
            +
                    @lemmata = GROUPS.dup
         | 
| 94 | 
            +
                  else
         | 
| 95 | 
            +
                    @lemmata = groups
         | 
| 96 | 
            +
                  end
         | 
| 97 | 
            +
                  @client.send(:retrieve_lemmata, flags_for(@lemmata))
         | 
| 98 | 
            +
                end
         | 
| 99 | 
            +
             | 
| 100 | 
            +
                # Allow for accessing individual group tags/lemmata flag,
         | 
| 101 | 
            +
                # e.g. config.left_context_tags, config.left_context_lemmata
         | 
| 102 | 
            +
                [:tags,:lemmata].each do |type|
         | 
| 103 | 
            +
                  GROUPS.each do |group|
         | 
| 104 | 
            +
                    define_method("#{group}_#{type}".to_sym) do
         | 
| 105 | 
            +
                      @tags.include?(group)
         | 
| 106 | 
            +
                    end
         | 
| 107 | 
            +
                  end
         | 
| 108 | 
            +
                end
         | 
| 109 | 
            +
             | 
| 110 | 
            +
                # Allow for changing individual group tags/lemmata flag,
         | 
| 111 | 
            +
                # e.g. config.left_context_tags = true, config.left_context_lemmata = true
         | 
| 112 | 
            +
                [:tags,:lemmata].each do |type|
         | 
| 113 | 
            +
                  GROUPS.each do |group|
         | 
| 114 | 
            +
                    define_method("#{group}_#{type}=".to_sym) do |value|
         | 
| 115 | 
            +
                      if value
         | 
| 116 | 
            +
                        @tags << group unless @tags.include?(group)
         | 
| 117 | 
            +
                      else
         | 
| 118 | 
            +
                        @tags.delete(group) if @tags.include?(group)
         | 
| 119 | 
            +
                      end
         | 
| 120 | 
            +
                      @client.send("retrieve_#{type}".to_sym, flags_for(@tags))
         | 
| 121 | 
            +
                    end
         | 
| 122 | 
            +
                  end
         | 
| 123 | 
            +
                end
         | 
| 124 | 
            +
             | 
| 125 | 
            +
                protected
         | 
| 126 | 
            +
                def correct_context_value?(value)
         | 
| 127 | 
            +
                  value.is_a?(Fixnum) && value > 0
         | 
| 128 | 
            +
                end
         | 
| 129 | 
            +
             | 
| 130 | 
            +
                def flags_for(elements)
         | 
| 131 | 
            +
                  flags = ""
         | 
| 132 | 
            +
                  GROUPS.each do |flag|
         | 
| 133 | 
            +
                    flags << (elements.include?(flag) ? "1" : "0")
         | 
| 134 | 
            +
                  end
         | 
| 135 | 
            +
                  flags
         | 
| 136 | 
            +
                end
         | 
| 137 | 
            +
              end
         | 
| 138 | 
            +
            end
         | 
    
        data/lib/poliqarpr/connector.rb
    CHANGED
    
    | @@ -44,7 +44,7 @@ module Poliqarp | |
| 44 44 | 
             
                  @debug = debug
         | 
| 45 45 | 
             
                end
         | 
| 46 46 |  | 
| 47 | 
            -
                # Opens connection with poliqarp server which runs | 
| 47 | 
            +
                # Opens connection with poliqarp server which runs
         | 
| 48 48 | 
             
                # on given +host+ and +port+.
         | 
| 49 49 | 
             
                def open(host,port)
         | 
| 50 50 | 
             
                  @socket_mutex.synchronize {
         | 
| @@ -60,17 +60,16 @@ module Poliqarp | |
| 60 60 | 
             
                  }
         | 
| 61 61 | 
             
                end
         | 
| 62 62 |  | 
| 63 | 
            -
                # Sends message to the poliqarp server. Returns the first synchronous | 
| 63 | 
            +
                # Sends message to the poliqarp server. Returns the first synchronous
         | 
| 64 64 | 
             
                # answer of the server.
         | 
| 65 65 | 
             
                # * +message+ the message to send
         | 
| 66 66 | 
             
                # * +mode+ synchronous (+:sync:) or asynchronous (+:async+)
         | 
| 67 67 | 
             
                # * +handler+ the handler of the asynchronous message
         | 
| 68 | 
            -
                def  | 
| 68 | 
            +
                def send_message(message, mode, &handler)
         | 
| 69 69 | 
             
                  puts "send #{mode} #{message}" if @debug
         | 
| 70 70 | 
             
                  if ruby19?
         | 
| 71 71 | 
             
                    massage = message.encode(UTF8)
         | 
| 72 72 | 
             
                  end
         | 
| 73 | 
            -
                  #@socket.puts(message)
         | 
| 74 73 | 
             
                  @socket.write(message+"\n")
         | 
| 75 74 | 
             
                  if mode == :async
         | 
| 76 75 | 
             
                    @handler = handler
         | 
| @@ -79,7 +78,7 @@ module Poliqarp | |
| 79 78 | 
             
                end
         | 
| 80 79 |  | 
| 81 80 | 
             
                # Retrives one message from the server.
         | 
| 82 | 
            -
                # If the message indicates an error, new runtime error | 
| 81 | 
            +
                # If the message indicates an error, new runtime error
         | 
| 83 82 | 
             
                # containing the error description is returned.
         | 
| 84 83 | 
             
                def read_message
         | 
| 85 84 | 
             
                  message = @message_queue.shift
         | 
| @@ -94,7 +93,7 @@ module Poliqarp | |
| 94 93 |  | 
| 95 94 | 
             
            private
         | 
| 96 95 | 
             
                def main_loop
         | 
| 97 | 
            -
                  @loop = Thread.new { | 
| 96 | 
            +
                  @loop = Thread.new {
         | 
| 98 97 | 
             
                    loop {
         | 
| 99 98 | 
             
                      receive
         | 
| 100 99 | 
             
                      # XXX ??? needed
         | 
| @@ -124,8 +123,8 @@ private | |
| 124 123 |  | 
| 125 124 | 
             
                def receive_async(message)
         | 
| 126 125 | 
             
                  puts "receive async: #{message}" if @debug
         | 
| 127 | 
            -
                  Thread.new{ | 
| 128 | 
            -
                    @handler.call(message) | 
| 126 | 
            +
                  Thread.new{
         | 
| 127 | 
            +
                    @handler.call(message)
         | 
| 129 128 | 
             
                  }
         | 
| 130 129 | 
             
                end
         | 
| 131 130 |  | 
    
        data/lib/poliqarpr/exceptions.rb
    CHANGED
    
    | @@ -2,7 +2,7 @@ module Poliqarp | |
| 2 2 | 
             
              # Author:: Aleksander Pohl (mailto:apohllo@o2.pl)
         | 
| 3 3 | 
             
              # License:: MIT License
         | 
| 4 4 |  | 
| 5 | 
            -
              # The JobInProgress exception is raised if there was asynchronous call | 
| 5 | 
            +
              # The JobInProgress exception is raised if there was asynchronous call
         | 
| 6 6 | 
             
              # to the server which haven't finished, which is interrupted by another
         | 
| 7 7 | 
             
              # asynchronous call.
         | 
| 8 8 | 
             
              class JobInProgress < Exception; end
         | 
    
        data/lib/poliqarpr/excerpt.rb
    CHANGED
    
    | @@ -2,13 +2,13 @@ module Poliqarp | |
| 2 2 | 
             
              # Author:: Aleksander Pohl
         | 
| 3 3 | 
             
              # License:: MIT License
         | 
| 4 4 | 
             
              #
         | 
| 5 | 
            -
              # The excerpt class is used to store single result of the query, | 
| 5 | 
            +
              # The excerpt class is used to store single result of the query,
         | 
| 6 6 | 
             
              # i.e. the excerpt of the corpus which contains the words which
         | 
| 7 | 
            -
              # the corpus was queried for. | 
| 7 | 
            +
              # the corpus was queried for.
         | 
| 8 8 | 
             
              #
         | 
| 9 9 | 
             
              # The excerpt is divided into groups, which contain segments,
         | 
| 10 | 
            -
              # which the texts in the corpus were divided for. | 
| 11 | 
            -
              # The first group is the left context, the second -- the matched | 
| 10 | 
            +
              # which the texts in the corpus were divided for.
         | 
| 11 | 
            +
              # The first group is the left context, the second -- the matched
         | 
| 12 12 | 
             
              # query, and the last -- the right context.
         | 
| 13 13 | 
             
              class Excerpt
         | 
| 14 14 | 
             
                attr_reader :index, :base_form, :short_context
         | 
| @@ -40,7 +40,7 @@ module Poliqarp | |
| 40 40 | 
             
                  @short_context[2]
         | 
| 41 41 | 
             
                end
         | 
| 42 42 |  | 
| 43 | 
            -
                # Returns the matched query as string | 
| 43 | 
            +
                # Returns the matched query as string
         | 
| 44 44 | 
             
                def word
         | 
| 45 45 | 
             
                  #@short_context[0].split(/\s+/)[-1]
         | 
| 46 46 | 
             
                  @short_context[1].map{|s| s.to_s}.join("")
         | 
| @@ -54,7 +54,7 @@ module Poliqarp | |
| 54 54 | 
             
                  @short_context.join("")
         | 
| 55 55 | 
             
                end
         | 
| 56 56 |  | 
| 57 | 
            -
                # Returns the long context of the query. | 
| 57 | 
            +
                # Returns the long context of the query.
         | 
| 58 58 | 
             
                def context
         | 
| 59 59 | 
             
                  return @context unless @context.nil?
         | 
| 60 60 | 
             
                  @context = @client.context(@base_form, @index)
         | 
| @@ -63,7 +63,7 @@ module Poliqarp | |
| 63 63 | 
             
                { :medium => :medium, :style => :styl, :date => :data_wydania,
         | 
| 64 64 | 
             
                  :city => :miejsce_wydania, :publisher => :wydawca, :title => :tytu,
         | 
| 65 65 | 
             
                  :author => :autor}.each do |method, keyword|
         | 
| 66 | 
            -
                  define_method method do | 
| 66 | 
            +
                  define_method method do
         | 
| 67 67 | 
             
                    self.metadata[keyword]
         | 
| 68 68 | 
             
                  end
         | 
| 69 69 | 
             
                  end
         | 
| @@ -2,9 +2,9 @@ module Poliqarp | |
| 2 2 | 
             
              # Author:: Aleksander Pohl (mailto:apohllo@o2.pl)
         | 
| 3 3 | 
             
              # License:: MIT License
         | 
| 4 4 | 
             
              #
         | 
| 5 | 
            -
              # The query result class is used to paginate results of the | 
| 5 | 
            +
              # The query result class is used to paginate results of the
         | 
| 6 6 | 
             
              # query. Each query result has information about its context
         | 
| 7 | 
            -
              # (the next and previous page). | 
| 7 | 
            +
              # (the next and previous page).
         | 
| 8 8 | 
             
              class QueryResult
         | 
| 9 9 | 
             
                include Enumerable
         | 
| 10 10 |  | 
| @@ -51,16 +51,16 @@ module Poliqarp | |
| 51 51 | 
             
                # Returns the previous page of the query result
         | 
| 52 52 | 
             
                def previous_page
         | 
| 53 53 | 
             
                  if @page > 1
         | 
| 54 | 
            -
                    @client.find(@query, :page_size => @page_size, | 
| 55 | 
            -
                                 :page_index => @page - 1) | 
| 54 | 
            +
                    @client.find(@query, :page_size => @page_size,
         | 
| 55 | 
            +
                                 :page_index => @page - 1)
         | 
| 56 56 | 
             
                  end
         | 
| 57 57 | 
             
                end
         | 
| 58 58 |  | 
| 59 59 | 
             
                # Return the next page of the query result
         | 
| 60 60 | 
             
                def next_page
         | 
| 61 61 | 
             
                  if @page < @page_count
         | 
| 62 | 
            -
                    @client.find(@query, :page_size => @page_size, | 
| 63 | 
            -
                                 :page_index => @page + 1) | 
| 62 | 
            +
                    @client.find(@query, :page_size => @page_size,
         | 
| 63 | 
            +
                                 :page_index => @page + 1)
         | 
| 64 64 | 
             
                  end
         | 
| 65 65 | 
             
                end
         | 
| 66 66 |  | 
    
        data/lib/poliqarpr/segment.rb
    CHANGED
    
    | @@ -1,22 +1,22 @@ | |
| 1 | 
            -
            module Poliqarp | 
| 1 | 
            +
            module Poliqarp
         | 
| 2 2 | 
             
              # Author:: Aleksander Pohl (mailto:apohllo@o2.pl)
         | 
| 3 3 | 
             
              # License:: MIT LICENSE
         | 
| 4 4 | 
             
              #
         | 
| 5 | 
            -
              # The segment is the smallest meaningful part of the text. | 
| 6 | 
            -
              # It may contain many lemmata, since the segments are sometimes | 
| 7 | 
            -
              # not disambiguated. | 
| 5 | 
            +
              # The segment is the smallest meaningful part of the text.
         | 
| 6 | 
            +
              # It may contain many lemmata, since the segments are sometimes
         | 
| 7 | 
            +
              # not disambiguated.
         | 
| 8 8 | 
             
              class Segment
         | 
| 9 9 | 
             
                attr_reader :literal, :lemmata
         | 
| 10 10 |  | 
| 11 | 
            -
                # Creates new segment. The specified argument is the literal | 
| 12 | 
            -
                # (as found in the text) representation of the segment. | 
| 11 | 
            +
                # Creates new segment. The specified argument is the literal
         | 
| 12 | 
            +
                # (as found in the text) representation of the segment.
         | 
| 13 13 | 
             
                def initialize(literal)
         | 
| 14 14 | 
             
                  @literal = literal
         | 
| 15 15 | 
             
                  @lemmata = []
         | 
| 16 16 | 
             
                end
         | 
| 17 17 |  | 
| 18 18 | 
             
                # Returns the segment literal
         | 
| 19 | 
            -
                def to_s | 
| 19 | 
            +
                def to_s
         | 
| 20 20 | 
             
                  @literal
         | 
| 21 21 | 
             
                end
         | 
| 22 22 | 
             
              end
         | 
    
        data/lib/poliqarpr/util.rb
    CHANGED
    
    
    
        data/poliqarpr.gemspec
    CHANGED
    
    | @@ -1,13 +1,13 @@ | |
| 1 1 | 
             
            Gem::Specification.new do |s|
         | 
| 2 2 | 
             
              s.name = "poliqarpr"
         | 
| 3 | 
            -
              s.version = "0.0 | 
| 4 | 
            -
              s.date = "2011-01- | 
| 3 | 
            +
              s.version = "0.1.0"
         | 
| 4 | 
            +
              s.date = "2011-01-17"
         | 
| 5 5 | 
             
              s.summary = "Ruby client for Poliqarp"
         | 
| 6 6 | 
             
              s.email = "apohllo@o2.pl"
         | 
| 7 7 | 
             
              s.homepage = "http://www.github.com/apohllo/poliqarpr"
         | 
| 8 8 | 
             
              s.description = "Ruby client for Poliqarp (NLP corpus server)"
         | 
| 9 9 | 
             
              s.authors = ['Aleksander Pohl']
         | 
| 10 | 
            -
              s.files = ["Rakefile", "poliqarpr.gemspec", | 
| 10 | 
            +
              s.files = ["Rakefile", "poliqarpr.gemspec",
         | 
| 11 11 | 
             
                "changelog.txt", "README.txt" ] + Dir.glob("lib/**/*")
         | 
| 12 12 | 
             
              s.test_files = Dir.glob("spec/**/*")
         | 
| 13 13 | 
             
              s.rdoc_options = ["--main", "README.txt"]
         | 
    
        data/spec/client.rb
    CHANGED
    
    | @@ -5,17 +5,17 @@ require 'poliqarpr' | |
| 5 5 | 
             
            describe Poliqarp::Client do
         | 
| 6 6 | 
             
              describe "(general test)" do
         | 
| 7 7 | 
             
                before(:each) do
         | 
| 8 | 
            -
                  @client = Poliqarp::Client.new(" | 
| 8 | 
            +
                  @client = Poliqarp::Client.new("TEST1")
         | 
| 9 9 | 
             
                end
         | 
| 10 | 
            -
             | 
| 11 | 
            -
                after(:each) do | 
| 10 | 
            +
             | 
| 11 | 
            +
                after(:each) do
         | 
| 12 12 | 
             
                  @client.close
         | 
| 13 13 | 
             
                end
         | 
| 14 | 
            -
             | 
| 14 | 
            +
             | 
| 15 15 | 
             
                it "should allow to open corpus" do
         | 
| 16 16 | 
             
                  @client.open_corpus("/home/fox/local/poliqarp/2.sample.30/sample")
         | 
| 17 17 | 
             
                end
         | 
| 18 | 
            -
             | 
| 18 | 
            +
             | 
| 19 19 | 
             
                it "should allow to open :default corpus" do
         | 
| 20 20 | 
             
                  @client.open_corpus(:default)
         | 
| 21 21 | 
             
                end
         | 
| @@ -32,7 +32,7 @@ describe Poliqarp::Client do | |
| 32 32 |  | 
| 33 33 | 
             
              describe "(with 'sample' corpus)" do
         | 
| 34 34 | 
             
                before(:all) do
         | 
| 35 | 
            -
                  @client = Poliqarp::Client.new(" | 
| 35 | 
            +
                  @client = Poliqarp::Client.new("TEST2")
         | 
| 36 36 | 
             
                  @client.open_corpus("/home/fox/local/poliqarp/2.sample.30/sample")
         | 
| 37 37 | 
             
                end
         | 
| 38 38 |  | 
| @@ -40,35 +40,37 @@ describe Poliqarp::Client do | |
| 40 40 | 
             
                  @client.close
         | 
| 41 41 | 
             
                end
         | 
| 42 42 |  | 
| 43 | 
            -
                it "should allow to set the right context size" do | 
| 44 | 
            -
                  @client. | 
| 43 | 
            +
                it "should allow to set and get the right context size" do
         | 
| 44 | 
            +
                  @client.config.right_context_size = 5
         | 
| 45 | 
            +
                  @client.config.right_context_size.should == 5
         | 
| 45 46 | 
             
                end
         | 
| 46 47 |  | 
| 47 | 
            -
                it "should raise error if the size of right context is not number" do | 
| 48 | 
            -
                  (proc do | 
| 49 | 
            -
                    @client. | 
| 48 | 
            +
                it "should raise error if the size of right context is not number" do
         | 
| 49 | 
            +
                  (proc do
         | 
| 50 | 
            +
                    @client.config.right_context_size = "a"
         | 
| 50 51 | 
             
                  end).should raise_error(RuntimeError)
         | 
| 51 52 | 
             
                end
         | 
| 52 53 |  | 
| 53 | 
            -
                it "should rais error if the size of right context is less or equal 0" do | 
| 54 | 
            -
                  (proc do | 
| 55 | 
            -
                    @client. | 
| 54 | 
            +
                it "should rais error if the size of right context is less or equal 0" do
         | 
| 55 | 
            +
                  (proc do
         | 
| 56 | 
            +
                    @client.config.right_context_size = 0
         | 
| 56 57 | 
             
                  end).should raise_error(RuntimeError)
         | 
| 57 58 | 
             
                end
         | 
| 58 59 |  | 
| 59 | 
            -
                it "should allow to set the left context size" do | 
| 60 | 
            -
                  @client. | 
| 60 | 
            +
                it "should allow to set and get the left context size" do
         | 
| 61 | 
            +
                  @client.config.left_context_size = 5
         | 
| 62 | 
            +
                  @client.config.left_context_size.should == 5
         | 
| 61 63 | 
             
                end
         | 
| 62 64 |  | 
| 63 | 
            -
                it "should raise error if the size of left context is not number" do | 
| 64 | 
            -
                  (lambda do | 
| 65 | 
            -
                    @client. | 
| 65 | 
            +
                it "should raise error if the size of left context is not number" do
         | 
| 66 | 
            +
                  (lambda do
         | 
| 67 | 
            +
                    @client.config.left_context_size = "a"
         | 
| 66 68 | 
             
                  end).should raise_error(RuntimeError)
         | 
| 67 69 | 
             
                end
         | 
| 68 70 |  | 
| 69 | 
            -
                it "should rais error if the size of left context is less or equal 0" do | 
| 70 | 
            -
                  (lambda do | 
| 71 | 
            -
                    @client. | 
| 71 | 
            +
                it "should rais error if the size of left context is less or equal 0" do
         | 
| 72 | 
            +
                  (lambda do
         | 
| 73 | 
            +
                    @client.config.left_context_size = 0
         | 
| 72 74 | 
             
                  end).should raise_error(RuntimeError)
         | 
| 73 75 | 
             
                end
         | 
| 74 76 |  | 
| @@ -87,7 +89,7 @@ describe Poliqarp::Client do | |
| 87 89 | 
             
                  tagset[:classes].should_not == nil
         | 
| 88 90 | 
             
                end
         | 
| 89 91 |  | 
| 90 | 
            -
                it "should allow to find 'kot'" do | 
| 92 | 
            +
                it "should allow to find 'kot'" do
         | 
| 91 93 | 
             
                  @client.find("kot").size.should_not == 0
         | 
| 92 94 | 
             
                end
         | 
| 93 95 |  | 
| @@ -129,7 +131,7 @@ describe Poliqarp::Client do | |
| 129 131 | 
             
                end
         | 
| 130 132 |  | 
| 131 133 | 
             
                describe("(with index specified in find)") do
         | 
| 132 | 
            -
                  before(:each) do | 
| 134 | 
            +
                  before(:each) do
         | 
| 133 135 | 
             
                    @result = @client.find("nachalny",:index => 0)
         | 
| 134 136 | 
             
                  end
         | 
| 135 137 |  | 
| @@ -146,13 +148,12 @@ describe Poliqarp::Client do | |
| 146 148 | 
             
                  end
         | 
| 147 149 | 
             
                end
         | 
| 148 150 |  | 
| 149 | 
            -
                describe("(with lemmata flags set to true)") do | 
| 151 | 
            +
                describe("(with lemmata flags set to true)") do
         | 
| 150 152 | 
             
                  before(:all) do
         | 
| 151 | 
            -
                    @client.lemmata =  | 
| 152 | 
            -
                      :left_match => true, :right_match => true}
         | 
| 153 | 
            +
                    @client.config.lemmata = [:left_context, :right_context, :left_match, :right_match]
         | 
| 153 154 | 
             
                  end
         | 
| 154 155 |  | 
| 155 | 
            -
                  it "should allow to find 'kotu'" do | 
| 156 | 
            +
                  it "should allow to find 'kotu'" do
         | 
| 156 157 | 
             
                    @client.find("kotu").size.should_not == 0
         | 
| 157 158 | 
             
                  end
         | 
| 158 159 |  | 
    
        data/spec/excerpt.rb
    CHANGED
    
    | @@ -25,7 +25,7 @@ describe Poliqarp::Excerpt do | |
| 25 25 | 
             
                  @excerpt.index.should_not == nil
         | 
| 26 26 | 
             
                end
         | 
| 27 27 |  | 
| 28 | 
            -
                it "should have base form" do | 
| 28 | 
            +
                it "should have base form" do
         | 
| 29 29 | 
             
                  @excerpt.base_form.should_not == nil
         | 
| 30 30 | 
             
                end
         | 
| 31 31 |  | 
| @@ -64,10 +64,10 @@ describe Poliqarp::Excerpt do | |
| 64 64 | 
             
                end
         | 
| 65 65 |  | 
| 66 66 | 
             
                it "should have index set to 0" do
         | 
| 67 | 
            -
                  @excerpt.index.should == 0 | 
| 67 | 
            +
                  @excerpt.index.should == 0
         | 
| 68 68 | 
             
                end
         | 
| 69 69 |  | 
| 70 | 
            -
                it "should have base form set to 'kot'" do | 
| 70 | 
            +
                it "should have base form set to 'kot'" do
         | 
| 71 71 | 
             
                  @excerpt.base_form.should == "mu za to astronomiczną"
         | 
| 72 72 | 
             
                end
         | 
| 73 73 |  | 
| @@ -96,7 +96,7 @@ describe Poliqarp::Excerpt do | |
| 96 96 | 
             
                it "should have 'city' set to nil" do
         | 
| 97 97 | 
             
                  @excerpt.city.should == nil
         | 
| 98 98 | 
             
                end
         | 
| 99 | 
            -
             | 
| 99 | 
            +
             | 
| 100 100 | 
             
                it "should have one 'publisher' set to 'Wydawnictwo Naukowe Akademii Pedagogicznej'" do
         | 
| 101 101 | 
             
                  @excerpt.publisher.size.should == 1
         | 
| 102 102 | 
             
                  @excerpt.publisher[0].should == "Wydawnictwo W.A.B."
         | 
| @@ -114,13 +114,13 @@ describe Poliqarp::Excerpt do | |
| 114 114 | 
             
              end
         | 
| 115 115 |  | 
| 116 116 | 
             
              describe('first result for "kotu" with lemmatization turned on') do
         | 
| 117 | 
            -
                before(:all) do | 
| 118 | 
            -
                  @client.lemmata = :all | 
| 117 | 
            +
                before(:all) do
         | 
| 118 | 
            +
                  @client.config.lemmata = :all
         | 
| 119 119 | 
             
                  @client.open_corpus(:default)
         | 
| 120 | 
            -
                  @excerpt = @client.find("kotu")[0] | 
| 120 | 
            +
                  @excerpt = @client.find("kotu")[0]
         | 
| 121 121 | 
             
                end
         | 
| 122 122 |  | 
| 123 | 
            -
                it "should have one lemmata for each segment" do | 
| 123 | 
            +
                it "should have one lemmata for each segment" do
         | 
| 124 124 | 
             
                  @excerpt.short_context.each do |group|
         | 
| 125 125 | 
             
                    group.each do |segment|
         | 
| 126 126 | 
             
                      segment.lemmata.size.should == 1
         | 
| @@ -134,7 +134,7 @@ describe Poliqarp::Excerpt do | |
| 134 134 | 
             
                  end
         | 
| 135 135 | 
             
                end
         | 
| 136 136 |  | 
| 137 | 
            -
                it "should contain 'kot' as one of the lemmata" do | 
| 137 | 
            +
                it "should contain 'kot' as one of the lemmata" do
         | 
| 138 138 | 
             
                  @excerpt.short_context.flatten.
         | 
| 139 139 | 
             
                    any?{|s| s.lemmata[0].base_form == "kot"}.should == true
         | 
| 140 140 | 
             
                end
         | 
    
        data/spec/query_result.rb
    CHANGED
    
    | @@ -12,7 +12,7 @@ describe Poliqarp::QueryResult do | |
| 12 12 | 
             
                @client.close
         | 
| 13 13 | 
             
              end
         | 
| 14 14 |  | 
| 15 | 
            -
              describe "(for unspecified query)" do | 
| 15 | 
            +
              describe "(for unspecified query)" do
         | 
| 16 16 | 
             
                before(:all) do
         | 
| 17 17 | 
             
                  @result = @client.find("kita")
         | 
| 18 18 | 
             
                end
         | 
| @@ -57,11 +57,11 @@ describe Poliqarp::QueryResult do | |
| 57 57 | 
             
              end
         | 
| 58 58 |  | 
| 59 59 | 
             
              describe "(for 'kot' in :default corpus)" do
         | 
| 60 | 
            -
                before(:all) do | 
| 60 | 
            +
                before(:all) do
         | 
| 61 61 | 
             
                  @result = @client.find("kot")
         | 
| 62 62 | 
             
                end
         | 
| 63 63 |  | 
| 64 | 
            -
                it "should have size == 6" do | 
| 64 | 
            +
                it "should have size == 6" do
         | 
| 65 65 | 
             
                  @result.size.should == 6
         | 
| 66 66 | 
             
                end
         | 
| 67 67 |  | 
| @@ -83,11 +83,11 @@ describe Poliqarp::QueryResult do | |
| 83 83 | 
             
              end
         | 
| 84 84 |  | 
| 85 85 | 
             
              describe "(for 'kot' with page_size set to 5 in :default corpus)" do
         | 
| 86 | 
            -
                before(:all) do | 
| 86 | 
            +
                before(:all) do
         | 
| 87 87 | 
             
                  @result = @client.find("kot", :page_size => 5)
         | 
| 88 88 | 
             
                end
         | 
| 89 89 |  | 
| 90 | 
            -
                it "should have size == 5" do | 
| 90 | 
            +
                it "should have size == 5" do
         | 
| 91 91 | 
             
                  @result.size.should == 5
         | 
| 92 92 | 
             
                end
         | 
| 93 93 |  | 
| @@ -109,11 +109,11 @@ describe Poliqarp::QueryResult do | |
| 109 109 | 
             
              end
         | 
| 110 110 |  | 
| 111 111 | 
             
              describe "(next for 'kot' with page_size set to 5 in :default corpus)" do
         | 
| 112 | 
            -
                before(:all) do | 
| 112 | 
            +
                before(:all) do
         | 
| 113 113 | 
             
                  @result = @client.find("kot", :page_size => 5).next_page
         | 
| 114 114 | 
             
                end
         | 
| 115 115 |  | 
| 116 | 
            -
                it "should have size == 1" do | 
| 116 | 
            +
                it "should have size == 1" do
         | 
| 117 117 | 
             
                  @result.size.should == 1
         | 
| 118 118 | 
             
                end
         | 
| 119 119 |  | 
    
        metadata
    CHANGED
    
    | @@ -4,9 +4,9 @@ version: !ruby/object:Gem::Version | |
| 4 4 | 
             
              prerelease: false
         | 
| 5 5 | 
             
              segments: 
         | 
| 6 6 | 
             
              - 0
         | 
| 7 | 
            +
              - 1
         | 
| 7 8 | 
             
              - 0
         | 
| 8 | 
            -
               | 
| 9 | 
            -
              version: 0.0.8
         | 
| 9 | 
            +
              version: 0.1.0
         | 
| 10 10 | 
             
            platform: ruby
         | 
| 11 11 | 
             
            authors: 
         | 
| 12 12 | 
             
            - Aleksander Pohl
         | 
| @@ -14,7 +14,7 @@ autorequire: | |
| 14 14 | 
             
            bindir: bin
         | 
| 15 15 | 
             
            cert_chain: []
         | 
| 16 16 |  | 
| 17 | 
            -
            date: 2011-01- | 
| 17 | 
            +
            date: 2011-01-17 00:00:00 +01:00
         | 
| 18 18 | 
             
            default_executable: 
         | 
| 19 19 | 
             
            dependencies: []
         | 
| 20 20 |  | 
| @@ -32,6 +32,7 @@ files: | |
| 32 32 | 
             
            - changelog.txt
         | 
| 33 33 | 
             
            - README.txt
         | 
| 34 34 | 
             
            - lib/poliqarpr.rb
         | 
| 35 | 
            +
            - lib/poliqarpr/config.rb
         | 
| 35 36 | 
             
            - lib/poliqarpr/exceptions.rb
         | 
| 36 37 | 
             
            - lib/poliqarpr/lemmata.rb
         | 
| 37 38 | 
             
            - lib/poliqarpr/query_result.rb
         |