lucid_works 0.6.29 → 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. data/.rvmrc +1 -1
  2. data/Rakefile +25 -0
  3. data/config/locales/en.yml +171 -83
  4. data/lib/lucid_works/associations/has_many.rb +2 -2
  5. data/lib/lucid_works/associations/has_one.rb +1 -1
  6. data/lib/lucid_works/associations/proxy.rb +3 -3
  7. data/lib/lucid_works/associations.rb +2 -2
  8. data/lib/lucid_works/base.rb +21 -48
  9. data/lib/lucid_works/collection/click.rb +17 -0
  10. data/lib/lucid_works/collection/settings.rb +0 -1
  11. data/lib/lucid_works/collection.rb +22 -3
  12. data/lib/lucid_works/crawler.rb +13 -0
  13. data/lib/lucid_works/datasource/history.rb +5 -9
  14. data/lib/lucid_works/datasource/status.rb +8 -11
  15. data/lib/lucid_works/datasource.rb +67 -32
  16. data/lib/lucid_works/datasource_property.rb +18 -0
  17. data/lib/lucid_works/datasource_type.rb +23 -0
  18. data/lib/lucid_works/exceptions.rb +1 -0
  19. data/lib/lucid_works/field.rb +43 -2
  20. data/lib/lucid_works/fieldtype.rb +28 -0
  21. data/lib/lucid_works/gem_version.rb +1 -1
  22. data/lib/lucid_works/jdbcdriver.rb +30 -0
  23. data/lib/lucid_works/role.rb +59 -0
  24. data/lib/lucid_works/schema/attribute.rb +86 -0
  25. data/lib/lucid_works/schema/boolean_attribute.rb +34 -0
  26. data/lib/lucid_works/schema/custom_attribute.rb +15 -0
  27. data/lib/lucid_works/schema/integer_attribute.rb +32 -0
  28. data/lib/lucid_works/schema/iso8601_attribute.rb +31 -0
  29. data/lib/lucid_works/schema/string_attribute.rb +22 -0
  30. data/lib/lucid_works/schema.rb +66 -97
  31. data/lib/lucid_works/server.rb +14 -0
  32. data/lib/lucid_works.rb +12 -0
  33. data/spec/fixtures/fake_file_ds_to_be_deleted/.gitkeep +0 -0
  34. data/spec/fixtures/fake_file_ds_to_be_updated/.gitkeep +0 -0
  35. data/spec/fixtures/fake_file_ds_to_get_index_of/.gitkeep +0 -0
  36. data/spec/fixtures/fake_file_ds_to_get_schedule_of/.gitkeep +0 -0
  37. data/spec/fixtures/fake_file_ds_to_get_status_of/.gitkeep +0 -0
  38. data/spec/fixtures/fake_file_ds_to_mess_with_job_of/.gitkeep +0 -0
  39. data/spec/fixtures/fake_file_ds_to_test_progress/.gitkeep +0 -0
  40. data/spec/lib/lucid_works/associations/has_many_spec.rb +4 -3
  41. data/spec/lib/lucid_works/associations/has_one_spec.rb +4 -3
  42. data/spec/lib/lucid_works/base_spec.rb +110 -62
  43. data/spec/lib/lucid_works/collection/activity/history_spec.rb +1 -1
  44. data/spec/lib/lucid_works/collection_spec.rb +17 -17
  45. data/spec/lib/lucid_works/datasource/history_spec.rb +4 -4
  46. data/spec/lib/lucid_works/datasource/status_spec.rb +7 -7
  47. data/spec/lib/lucid_works/datasource_spec.rb +9 -8
  48. data/spec/lib/lucid_works/field_spec.rb +101 -2
  49. data/spec/lib/lucid_works/fieldtype_spec.rb +156 -0
  50. data/spec/lib/lucid_works/schema/attribute_spec.rb +136 -0
  51. data/spec/lib/lucid_works/schema_spec.rb +53 -27
  52. data/spec/spec_helper.rb +3 -50
  53. data/spec/support/active_model_lint.rb +21 -0
  54. data/spec/support/lucid_works.rb +52 -0
  55. metadata +36 -2
@@ -0,0 +1,17 @@
1
+ require 'lucid_works/field'
2
+
3
+ module LucidWorks
4
+ class Collection
5
+ class Click < Base
6
+ self.singleton = true
7
+ belongs_to :collection
8
+
9
+ schema do
10
+ attribute :type, :string, :values => %w{ q c }
11
+ attributes :req, :q, :doc, :type => :string
12
+ attributes :qt, :ct, :hits, :pos, :type => :integer
13
+ attribute :buffering, :boolean
14
+ end
15
+ end
16
+ end
17
+ end
@@ -16,7 +16,6 @@ module LucidWorks
16
16
  # Indexing Settings
17
17
  attribute :unknown_type_handling, :string, :values => LucidWorks::Field::TYPES
18
18
  attribute :de_duplication, :string, :values => DEDUP_OPTIONS
19
- attribute :index_time_stopwords, :boolean
20
19
 
21
20
  # Querying Settings
22
21
  attribute :unsupervised_feedback_emphasis, :string, :values => FEEDBACK_EMPHASIS
@@ -3,8 +3,8 @@ module LucidWorks
3
3
  class Collection < Base
4
4
 
5
5
  belongs_to :server
6
- has_many :datasources, :fields, :activities
7
- has_one :info, :settings
6
+ has_many :datasources, :fields, :fieldtypes, :activities, :roles
7
+ has_one :info, :settings, :click
8
8
  has_one :index, :has_content => false
9
9
 
10
10
  schema do
@@ -16,7 +16,6 @@ module LucidWorks
16
16
  LOGS_COLLECTION_NAME = 'LucidWorksLogs'
17
17
 
18
18
  validates_presence_of :name
19
- validates_exclusion_of :name, :in => [LOGS_COLLECTION_NAME]
20
19
 
21
20
  def destroyable?
22
21
  true
@@ -113,5 +112,25 @@ module LucidWorks
113
112
  def autocomplete_activity
114
113
  prime_activities.detect{|act| act.type == 'autocomplete'}
115
114
  end
115
+
116
+ # URL of Solr's build-in admin page
117
+ def admin_url
118
+ "#{server.host}/solr/#{name}/admin/"
119
+ end
120
+
121
+ # We have to handle the jdbcdrivers model in a custom way, as a GET on .../jdbcdrivers returns a list of strings,
122
+ # but then you can actually do a DELETE on .../jdbcdrivers/name.
123
+ # For the DELETE reason we do use a LucidWorks::Base model, but we have to create them by hand.
124
+ def jdbcdrivers
125
+ driverlist = JSON.parse RestClient.get(self.uri + '/jdbcdrivers')
126
+ driverlist.map do |drivername|
127
+ Jdbcdriver.new(:name => drivername, :collection => self)
128
+ end
129
+ end
130
+
131
+ def jdbcdriver_classes
132
+ JSON.parse RestClient.get(uri + "/jdbcdrivers/classes")
133
+ end
134
+
116
135
  end
117
136
  end
@@ -3,6 +3,19 @@ module LucidWorks
3
3
  class Crawler < Base
4
4
  schema do
5
5
  primary_key :name
6
+ attribute :datasource_types, :custom
7
+ end
8
+
9
+ attr_reader :datasource_types
10
+
11
+ def datasource_types=(array_of_hashes)
12
+ @datasource_types ||= array_of_hashes.map do |dt_attrs|
13
+ DatasourceType.new(dt_attrs)
14
+ end
15
+ end
16
+
17
+ def datasource_type(type)
18
+ datasource_types.detect { |t| t.type == type }
6
19
  end
7
20
  end
8
21
  end
@@ -4,17 +4,13 @@ module LucidWorks
4
4
  class History < Base
5
5
  belongs_to :datasource
6
6
  self.collection_name = 'history' # i.e. not the plural 'histories'
7
-
8
- def doc_count
9
- numUpdated + numNew + numUnchanged
7
+ schema do
8
+ attributes :crawl_started, :crawl_stopped, :type => :iso8601
9
+ attributes :num_updated, :num_new, :num_unchanged, :type => :integer
10
10
  end
11
11
 
12
- def crawl_stopped
13
- Time.iso8601 crawlStopped
14
- end
15
-
16
- def crawl_started
17
- Time.iso8601 crawlStarted
12
+ def doc_count
13
+ num_updated + num_new + num_unchanged
18
14
  end
19
15
 
20
16
  def duration
@@ -10,9 +10,10 @@ module LucidWorks
10
10
  CRAWLSTATES = STOPPED_STATES + [ 'RUNNING' ] + POST_PROCESSING_STATES
11
11
 
12
12
  schema do
13
- attribute :crawlState, :string, :values => CRAWLSTATES
14
- attributes :crawlStarted, :crawlStopped, :jobId
15
- attributes :numUnchanged, :numUpdated, :numNew, :numFailed, :numDeleted, :numTotal, :type => :integer
13
+ attribute :crawl_state, :string, :values => CRAWLSTATES
14
+ attribute :jobId
15
+ attributes :crawl_started, :crawl_stopped, :type => :iso8601
16
+ attributes :num_unchanged, :num_updated, :num_new, :num_failed, :num_deleted, :num_total, :type => :integer
16
17
  end
17
18
 
18
19
  # Create predicate methods for all the crawl states
@@ -20,25 +21,21 @@ module LucidWorks
20
21
  method_name = state.downcase + "?"
21
22
  class_eval <<-EOF
22
23
  def #{method_name}
23
- self.crawlState == "#{state}"
24
+ self.crawl_state == "#{state}"
24
25
  end
25
26
  EOF
26
27
  end
27
28
 
28
29
  def stopped?
29
- STOPPED_STATES.include?(crawlState)
30
+ STOPPED_STATES.include?(crawl_state)
30
31
  end
31
32
 
32
33
  def post_processing?
33
- POST_PROCESSING_STATES.include?(crawlState)
34
+ POST_PROCESSING_STATES.include?(crawl_state)
34
35
  end
35
36
 
36
37
  def doc_count
37
- numUpdated + numNew + numUnchanged
38
- end
39
-
40
- def crawl_started
41
- Time.iso8601 crawlStarted
38
+ num_updated + num_new + num_unchanged
42
39
  end
43
40
 
44
41
  def elapsed_time
@@ -2,46 +2,96 @@ module LucidWorks
2
2
 
3
3
  class Datasource < Base
4
4
  belongs_to :collection
5
- has_many :histories, :class_name => :history, :supports_all => true
6
- has_one :status, :supports_all => true
5
+ has_many :histories, :class_name => :history, :retrieveable_en_masse => true
6
+ has_one :status, :retrieveable_en_masse => true
7
7
  has_one :schedule, :crawldata
8
8
  has_one :index, :job, :has_content => false
9
9
 
10
+ TYPES = %w{ external file ftp hdfs kfs lucidworkslogs web s3 s3n smb solrxml jdbc sharepoint }
11
+ SYSTEM_TYPES = %w{ lucidworkslogs }
12
+ BOUNDS = %w{ tree none }
13
+ CRAWLERS = {
14
+ # Later we may change these to be arrays if we decide to support more than one choice
15
+ # e.g. :web => ['lucid.aperture', 'nutch'], :file => ['lucid.aperture', 'lucid.fs']
16
+ :file => 'lucid.aperture',
17
+ :lucidworkslogs => 'lucid.logs',
18
+ :external => 'lucid.external',
19
+ :web => 'lucid.aperture',
20
+ :solrxml => 'lucid.solrxml',
21
+ :jdbc => 'lucid.jdbc',
22
+ :sharepoint => 'lucid.gcm',
23
+ :ftp => 'lucid.fs',
24
+ :hdfs => 'lucid.fs',
25
+ :kfs => 'lucid.fs',
26
+ :smb => 'lucid.fs',
27
+ :s3n => 'lucid.fs',
28
+ :s3 => 'lucid.fs'
29
+ }.with_indifferent_access
30
+
10
31
  schema do
11
32
  # common
12
- attributes :name, :type, :crawler
33
+ attributes :name, :crawler
34
+ attribute :type, :string, :values => TYPES
13
35
  attributes :crawl_depth, :max_docs, :type => :integer
14
- attributes :max_bytes, :commitWithin, :type => :integer, :omit_when_blank => true
36
+ attributes :max_bytes, :commit_within, :type => :integer, :omit_when_blank => true
15
37
  attribute :commit_within_min, :custom
38
+ attribute :commit_on_finish, :boolean
16
39
  attribute :include_paths
17
40
  attribute :exclude_paths
18
- attribute :mapping # Hash
19
- attribute :bounds
41
+ attribute :mapping, :string, :omit_when_blank => true # Hash
42
+ attribute :bounds, :string, :values => BOUNDS
20
43
  # web
21
44
  attributes :url, :category
22
45
  attribute :collect_links, :boolean
46
+ attribute :auth, :string, :omit_when_blank => true # Hash
47
+ attributes :proxy_host, :string, :omit_when_blank => true
48
+ attribute :proxy_port, :string, :omit_when_blank => true
49
+ attribute :proxy_username, :string, :omit_when_blank => true
50
+ attribute :proxy_password, :string, :omit_when_blank => true
51
+ attribute :ignore_robots, :boolean
23
52
  # file
24
53
  attribute :path
25
54
  attribute :follow_links, :boolean
55
+ attribute :parsing, :boolean
56
+ attribute :indexing, :boolean
57
+ attribute :caching, :boolean
58
+ attribute :verify_access, :boolean
59
+ attribute :log_extra_detail, :boolean
60
+ attribute :fail_unsupported_file_types, :boolean
61
+ attribute :warn_unknown_mime_types, :boolean
62
+ attribute :no_duplicates, :boolean
26
63
  # sharepoint
27
- attribute :sharepointUrl
28
- attribute :connectorType
64
+ attribute :sharepoint_url
65
+ attribute :connector_type
29
66
  attribute :authorization
30
67
  attribute :username
31
68
  attribute :password
32
69
  attribute :domain
33
- attribute :mySiteBaseURL, :string, :nil_when_blank => true
34
- attribute :includedURls
35
- attribute :excludedURls
70
+ attribute :my_site_base_url, :string, :nil_when_blank => true
71
+ attribute :included_urls
72
+ attribute :excluded_urls
36
73
  attribute :kdcserver
37
- attribute :useSPSearchVisibility, :boolean
74
+ attribute :use_sp_search_visibility, :boolean
38
75
  attribute :aliases
39
76
  # external
40
77
  attribute :source
41
78
  attribute :source_type
42
79
  # lucidworkslogs
43
- attribute :deleteAfter, :integer, :omit_when_blank => true
80
+ attribute :delete_after, :integer, :omit_when_blank => true
44
81
  attribute :delete_after_days, :custom
82
+ # smb
83
+ attribute :windows_domain
84
+ attributes :parsing, :indexing, :caching, :type => :boolean
85
+ # jdbc
86
+ attribute :driver
87
+ attribute :primary_key
88
+ attribute :sql_select_statement
89
+ attribute :delta_sql_query
90
+ attribute :nested_queries # list
91
+ # solrxml
92
+ attribute :file
93
+ attribute :include_datasource_metadata, :boolean
94
+ attribute :generate_unique_key, :boolean
45
95
  end
46
96
 
47
97
  validates_presence_of :type, :crawler, :name
@@ -49,33 +99,18 @@ module LucidWorks
49
99
  validates_numericality_of :max_bytes, :allow_blank => true
50
100
  validates_presence_of :url, :if => lambda { |d| d.type == 'web' }
51
101
 
52
- TYPES = %w{ external file lucidworkslogs web solrxml jdbc sharepoint }
53
- BOUNDS = %w{ tree none }
54
- CRAWLERS = {
55
- # Later we may change these to be arrays if we decide to support more than one choice
56
- # e.g. :web => ['lucid.aperture', 'nutch']
57
- :file => 'lucid.aperture',
58
- :lucidworkslogs => 'lucid.logs',
59
- :external => 'lucid.external',
60
- :web => 'lucid.aperture',
61
- :solrxml => 'lucid.solrxml',
62
- :jdbc => 'lucid.jdbc',
63
- :sharepoint => 'lucid.gcm'
64
- }.with_indifferent_access
65
-
66
-
67
102
  # Fake attributes to ease UI implementation
68
103
  def commit_within_min
69
- commitWithin.blank? ? nil : commitWithin / 1.second.milliseconds / 1.minute.seconds
104
+ commit_within.blank? ? nil : commit_within / 1.second.milliseconds / 1.minute.seconds
70
105
  end
71
106
  def commit_within_min=(mins)
72
- self.commitWithin = mins.blank? ? nil : mins.to_i.minutes.milliseconds
107
+ self.commit_within = mins.blank? ? nil : mins.to_i.minutes.milliseconds
73
108
  end
74
109
  def delete_after_days
75
- deleteAfter.blank? ? nil : deleteAfter / 1.second.milliseconds / 1.day.seconds
110
+ delete_after.blank? ? nil : deleteAfter / 1.second.milliseconds / 1.day.seconds
76
111
  end
77
112
  def delete_after_days=(days)
78
- self.deleteAfter = days.blank? ? nil : days.to_i.days.milliseconds
113
+ self.delete_after = days.blank? ? nil : days.to_i.days.milliseconds
79
114
  end
80
115
 
81
116
  def document_count
@@ -0,0 +1,18 @@
1
+ module LucidWorks
2
+
3
+ class DatasourceProperty
4
+
5
+ attr_reader :description, :name, :allowed_values, :type, :default_value, :required, :read_only
6
+ alias :read_only? :read_only
7
+
8
+ def initialize(attributes = {})
9
+ @description = attributes['description']
10
+ @name = attributes['name']
11
+ @allowed_values = attributes['allowedValues']
12
+ @type = attributes['type']
13
+ @default_value = attributes['defaultValues']
14
+ @required = attributes['required']
15
+ @read_only = attributes['read_only']
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,23 @@
1
+ module LucidWorks
2
+
3
+ class DatasourceType
4
+
5
+ attr_reader :category, :type, :props
6
+
7
+ def initialize(attributes = {})
8
+ @category = attributes['category']
9
+ @type = attributes['type']
10
+ @props = attributes['props']
11
+ end
12
+
13
+ def properties
14
+ @properties ||= @props.map do |prop|
15
+ DatasourceProperty.new(prop)
16
+ end
17
+ end
18
+
19
+ def property(name)
20
+ properties.detect { |p| p.name == name }
21
+ end
22
+ end
23
+ end
@@ -3,4 +3,5 @@ module LucidWorks
3
3
  end
4
4
 
5
5
  class ResourceNotFound < Exception ; end
6
+ class RecordInvalid < Exception ; end
6
7
  end
@@ -3,12 +3,19 @@ module LucidWorks
3
3
  class Field < Base
4
4
  belongs_to :collection
5
5
 
6
+ INDEXING_OPTIONS = [
7
+ 'document_only',
8
+ 'document_termfreq',
9
+ 'document_termfreq_termpos'
10
+ ]
11
+
6
12
  schema do
7
13
  attribute :name, :string, :primary_key => true, :omit_during_update => true
8
14
  attribute :editable, :boolean, :omit_during_update => true
15
+ attribute :dynamic_base, :string, :omit_during_update => true
9
16
  attributes :indexed, :stored, :facet, :include_in_results,
10
17
  :search_by_default, :highlight, :multi_valued,
11
- :term_vectors, :omit_tf,
18
+ :term_vectors, :omit_tf, :omit_positions,
12
19
  :use_for_deduplication, :synonym_expansion,
13
20
  :index_for_spellcheck, :index_for_autocomplete,
14
21
  :query_time_stopword_handling,
@@ -16,6 +23,8 @@ module LucidWorks
16
23
  :type => :boolean
17
24
  attribute :default_boost, :integer
18
25
  attributes :field_type, :short_field_boost, :term_vectors, :default_value, :copy_fields, :type => :string
26
+
27
+ attribute :index_term_freq_and_pos, :custom, :values => INDEXING_OPTIONS
19
28
  end
20
29
 
21
30
  TYPES = [
@@ -82,6 +91,10 @@ module LucidWorks
82
91
  validates_each :use_in_find_similar do |model, attr, value|
83
92
  model.errors.add(attr, 'a field must be indexed for it to be used for find-similar') if value == true && !model.indexed?
84
93
  end
94
+
95
+ def dynamically_generated?
96
+ !dynamic_base.blank?
97
+ end
85
98
 
86
99
  def t_field_type
87
100
  self.class.t_field_type(self.field_type)
@@ -94,7 +107,34 @@ module LucidWorks
94
107
  def initialize(options)
95
108
  super(options.reverse_merge(:omit_tf => false, :short_field_boost => 'high'))
96
109
  end
97
-
110
+
111
+ # Meta attribute that wraps the omit_tf and omit_positions combinations
112
+ def index_term_freq_and_pos
113
+ return :document_only if !indexed? || omit_tf?
114
+ return omit_positions? ? :document_termfreq : :document_termfreq_termpos
115
+ end
116
+
117
+ def index_term_freq_and_pos=(indexing_options)
118
+ if indexed?
119
+ case indexing_options.to_sym
120
+ when :document_only
121
+ self.omit_tf = true
122
+ self.omit_positions = true
123
+ when :document_termfreq
124
+ self.omit_tf = false
125
+ self.omit_positions = true
126
+ when :document_termfreq_termpos
127
+ self.omit_tf = false
128
+ self.omit_positions = false
129
+ else
130
+ raise "Unknown indexing option: '#{indexing_options}'. Allowed values are: #{INDEXING_OPTIONS.join(', ')}"
131
+ end
132
+ else # !indexed?
133
+ self.omit_tf = true
134
+ self.omit_positions = true
135
+ end
136
+ end
137
+
98
138
  def update_attributes(attrs)
99
139
  attrs = attrs.with_indifferent_access
100
140
  if [false, '0'].include?(attrs[:stored])
@@ -105,6 +145,7 @@ module LucidWorks
105
145
  attrs[:facet] ||= false
106
146
  attrs[:synonym_expansion] ||= false
107
147
  attrs[:omit_tf] ||= false
148
+ attrs[:omit_positions] ||= false
108
149
  attrs[:short_field_boost] ||= 'high'
109
150
  attrs[:search_by_default] ||= false
110
151
  attrs[:use_in_find_similar] ||= false
@@ -0,0 +1,28 @@
1
+ module LucidWorks
2
+
3
+ class Fieldtype < Base
4
+ belongs_to :collection
5
+
6
+ ANALYZER_TYPES = %w{ default index query }
7
+
8
+ schema do
9
+ dynamic_attributes true
10
+ attribute :name, :string, :primary_key => true, :omit_during_update => true
11
+ attribute :class, :string
12
+ attribute :analyzers
13
+ end
14
+
15
+ validates_presence_of :name, :_class
16
+
17
+ # Provide support for adding more attributes during update.
18
+ def update_attributes(attrs)
19
+ attrs.keys.each do |attr|
20
+ unless self.class.schema.has_attribute?(attr)
21
+ self.class.schema.add_attribute(attr, :string) unless self.class.schema.has_attribute?(attr)
22
+ end
23
+ end
24
+
25
+ super
26
+ end
27
+ end
28
+ end