rbbt-text 1.3.0 → 1.3.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c2a24d8e7faf30d53e41a00a27f6145e8e9f18f0c10af57cdddaea0ee18c35d6
4
- data.tar.gz: 3475006965110391e35151cd1b5368028dacf467aa276f8eb68fce3320be1122
3
+ metadata.gz: a6965ecde1b38d5bc93d4836ee6d757e2add39a51d64c2f06142bbbd303e22d7
4
+ data.tar.gz: a5c32ea03ea8214dd8c94ef6e884b59e459e3a7a8e3d26065a0a046b5b9b4778
5
5
  SHA512:
6
- metadata.gz: da40a039a4792eb5e7fa00270870279221c74dcbf51df1b5278b247496fefbfa888a87b7ab19f05676644c51a01177eb49e229cb0156fe7f0190dd4933d41e24
7
- data.tar.gz: a32fca5f21a987dcbb6b5541015cc33879330e6f1ef7c4a28e75debe5bdd1dc8bf7b98bfc91d828e605f29868aa972b55cd59bb4f86e66d2fb0cfea31fac2ae0
6
+ metadata.gz: 756d240a796e5ac88b4b55368e0e4e3af14b3dd2d8b8b55e49839c3cdc3fa45ee807d648cf86b45b62e7f2f4d9e7fc15567ab21d3356e37a5c3c4316cbcaa841
7
+ data.tar.gz: 6caa03ec51185cac00cc436bac999b063fccfcc1dbf0e2c09359dad7171c0eea37f80436cc860038a2c1ad17eb9b67a03e88d1ae8ef406ce1c5c874d375d1abd
@@ -69,6 +69,11 @@ module BagOfWords
69
69
  count = bigrams ? count(bigrams(text)) : count(words(text))
70
70
  count.values_at(*terms)
71
71
  end
72
+
73
+ def self.weighted_features(text, weights)
74
+ features = features(text, weights.keys)
75
+ features.zip(weights.values).collect{|f,w| f * w }
76
+ end
72
77
  end
73
78
 
74
79
  class String
@@ -74,28 +74,32 @@ class Dictionary::TF_IDF
74
74
  end
75
75
 
76
76
  def best(options = {})
77
- high, low, limit = {
78
- :low => 0,
79
- :high => 1,
80
- }.merge(options).
81
- values_at(:high, :low, :limit)
82
-
83
- num_docs = @num_docs.to_f
84
- best = df.select{|term, value|
85
- value >= low && value <= high
86
- }.collect{|p|
87
- term = p.first
88
- df_value = p.last
89
- [term,
90
- @terms[term].to_f / num_docs * Math::log(1.0/df_value)
91
- ]
92
- }
93
-
94
- if limit
95
- Hash[*best.sort{|a,b| b[1] <=> a[1]}.slice(0, limit).flatten]
96
- else
97
- Hash[*best.flatten]
98
- end
77
+ key = Misc.obj2digest(options)
78
+ @best ||= {}
79
+ @best[key] ||= begin
80
+ high, low, limit = {
81
+ :low => 0,
82
+ :high => 1,
83
+ }.merge(options).
84
+ values_at(:high, :low, :limit)
85
+
86
+ num_docs = @num_docs.to_f
87
+ best = df.select{|term, value|
88
+ value >= low && value <= high
89
+ }.collect{|p|
90
+ term = p.first
91
+ df_value = p.last
92
+ [term,
93
+ @terms[term].to_f / num_docs * Math::log(1.0/df_value)
94
+ ]
95
+ }
96
+
97
+ if limit
98
+ Hash[*best.sort{|a,b| b[1] <=> a[1]}.slice(0, limit).flatten]
99
+ else
100
+ Hash[*best.flatten]
101
+ end
102
+ end
99
103
  end
100
104
 
101
105
  def weights(options = {})
@@ -68,6 +68,10 @@ module Transformed
68
68
 
69
69
  segments = [segments] unless Array === segments
70
70
  orig_length = self.length
71
+
72
+ offset = self.respond_to?(:offset) ? self.offset : 0
73
+ segments = segments.select{|s| s.offset >= offset && s.offset <= offset + self.length - 1 }
74
+
71
75
  Segment.clean_sort(segments).each do |segment|
72
76
  next if segment.offset.nil?
73
77
 
@@ -86,7 +90,7 @@ module Transformed
86
90
 
87
91
  updated_text = self[updated_begin..updated_end]
88
92
  if updated_text.nil?
89
- Log.warn "Range outside of segment: #{self.length} #{segment.locus} (#{updated_range})"
93
+ Log.warn "Range outside of segment: #{self.length} #{segment.range} (#{updated_range})"
90
94
  next
91
95
  end
92
96
 
@@ -101,6 +101,35 @@ More recently, PPAR activators were shown to inhibit the activation of inflammat
101
101
  assert_equal "CDK5R1 protein", exp2
102
102
  end
103
103
 
104
+ def test_with_transform_sentences
105
+ a = "This first sentence mentions Bread. This sentence mentions the TP53 gene and the CDK5R1 protein"
106
+ original = a.dup
107
+
108
+ gene1 = "TP53"
109
+ gene1.extend NamedEntity
110
+ gene1.offset = a.index gene1
111
+
112
+ gene2 = "CDK5R1"
113
+ gene2.extend NamedEntity
114
+ gene2.offset = a.index gene2
115
+
116
+ bread = "Bread"
117
+ bread.extend NamedEntity
118
+ bread.offset = a.index bread
119
+
120
+ sentences = Segment.align(a, a.split(". "))
121
+
122
+ Transformed.with_transform(sentences[1], [gene1, gene2, bread], "GN") do
123
+ assert sentences[1].include?("GN gene and the GN protein")
124
+ end
125
+
126
+ Transformed.with_transform(sentences[0], [gene1, gene2, bread], "BR") do
127
+ assert sentences[0].include?("first sentence mentions BR")
128
+ end
129
+
130
+
131
+ end
132
+
104
133
  def test_html
105
134
  a = "This sentence mentions the TP53 gene and the CDK5R1 protein"
106
135
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbbt-text
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.3.0
4
+ version: 1.3.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Miguel Vazquez
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-04-18 00:00:00.000000000 Z
11
+ date: 2020-04-19 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rbbt-util