corp-extractor 0.2.11__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,288 @@
1
+ """
2
+ GLiNER2-based triple extraction.
3
+
4
+ Uses GLiNER2 for relation extraction and entity recognition to extract
5
+ subject, predicate, and object from source text. T5-Gemma model provides
6
+ triple structure and coreference resolution, while GLiNER2 handles
7
+ linguistic analysis.
8
+
9
+ The GLiNER2 model is loaded automatically on first use.
10
+ """
11
+
12
+ import logging
13
+ from typing import Optional
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+ # Lazy-loaded GLiNER2 model
18
+ _model = None
19
+
20
+
21
+ def _get_model():
22
+ """
23
+ Lazy-load the GLiNER2 model.
24
+
25
+ Uses the base model (205M parameters) which is CPU-optimized.
26
+ """
27
+ global _model
28
+ if _model is None:
29
+ from gliner2 import GLiNER2
30
+
31
+ logger.info("Loading GLiNER2 model 'fastino/gliner2-base-v1'...")
32
+ _model = GLiNER2.from_pretrained("fastino/gliner2-base-v1")
33
+ logger.debug("GLiNER2 model loaded")
34
+ return _model
35
+
36
+
37
+ def extract_triple_from_text(
38
+ source_text: str,
39
+ model_subject: str,
40
+ model_object: str,
41
+ model_predicate: str,
42
+ predicates: Optional[list[str]] = None,
43
+ ) -> tuple[str, str, str] | None:
44
+ """
45
+ Extract subject, predicate, object from source text using GLiNER2.
46
+
47
+ Returns a GLiNER2-based triple that can be added to the candidate pool
48
+ alongside the model's triple. The existing scoring/dedup logic will
49
+ pick the best one.
50
+
51
+ Args:
52
+ source_text: The source sentence to analyze
53
+ model_subject: Subject from T5-Gemma (used for matching and fallback)
54
+ model_object: Object from T5-Gemma (used for matching and fallback)
55
+ model_predicate: Predicate from T5-Gemma (used when no predicates provided)
56
+ predicates: Optional list of predefined relation types to extract
57
+
58
+ Returns:
59
+ Tuple of (subject, predicate, object) from GLiNER2, or None if extraction fails
60
+ """
61
+ if not source_text:
62
+ return None
63
+
64
+ try:
65
+ model = _get_model()
66
+
67
+ if predicates:
68
+ # Use relation extraction with predefined predicates
69
+ result = model.extract_relations(source_text, predicates)
70
+
71
+ # Find best matching relation
72
+ relation_data = result.get("relation_extraction", {})
73
+ best_match = None
74
+ best_confidence = 0.0
75
+
76
+ for rel_type, relations in relation_data.items():
77
+ for rel in relations:
78
+ # Handle both tuple format and dict format
79
+ if isinstance(rel, tuple):
80
+ head, tail = rel
81
+ confidence = 1.0
82
+ else:
83
+ head = rel.get("head", {}).get("text", "")
84
+ tail = rel.get("tail", {}).get("text", "")
85
+ confidence = min(
86
+ rel.get("head", {}).get("confidence", 0.5),
87
+ rel.get("tail", {}).get("confidence", 0.5)
88
+ )
89
+
90
+ # Score based on match with model hints
91
+ score = confidence
92
+ if model_subject.lower() in head.lower() or head.lower() in model_subject.lower():
93
+ score += 0.2
94
+ if model_object.lower() in tail.lower() or tail.lower() in model_object.lower():
95
+ score += 0.2
96
+
97
+ if score > best_confidence:
98
+ best_confidence = score
99
+ best_match = (head, rel_type, tail)
100
+
101
+ if best_match:
102
+ logger.debug(
103
+ f"GLiNER2 extracted (relation): subj='{best_match[0]}', pred='{best_match[1]}', obj='{best_match[2]}'"
104
+ )
105
+ return best_match
106
+
107
+ else:
108
+ # No predicate list provided - use GLiNER2 for entity extraction
109
+ # and extract predicate from source text using the model's hint
110
+
111
+ # Extract entities to refine subject/object boundaries
112
+ entity_types = [
113
+ "person", "organization", "company", "location", "city", "country",
114
+ "product", "event", "date", "money", "quantity"
115
+ ]
116
+ result = model.extract_entities(source_text, entity_types)
117
+ entities = result.get("entities", {})
118
+
119
+ # Find entities that match model subject/object
120
+ refined_subject = model_subject
121
+ refined_object = model_object
122
+
123
+ for entity_type, entity_list in entities.items():
124
+ for entity in entity_list:
125
+ entity_lower = entity.lower()
126
+ # Check if this entity matches or contains the model's subject/object
127
+ if model_subject.lower() in entity_lower or entity_lower in model_subject.lower():
128
+ # Use the entity text if it's more complete
129
+ if len(entity) >= len(refined_subject):
130
+ refined_subject = entity
131
+ if model_object.lower() in entity_lower or entity_lower in model_object.lower():
132
+ if len(entity) >= len(refined_object):
133
+ refined_object = entity
134
+
135
+ # Extract predicate from source text using predicate split
136
+ predicate_result = extract_triple_by_predicate_split(source_text, model_predicate)
137
+ if predicate_result:
138
+ _, extracted_predicate, _ = predicate_result
139
+ else:
140
+ extracted_predicate = model_predicate
141
+
142
+ if extracted_predicate:
143
+ logger.debug(
144
+ f"GLiNER2 extracted (entity-refined): subj='{refined_subject}', pred='{extracted_predicate}', obj='{refined_object}'"
145
+ )
146
+ return (refined_subject, extracted_predicate, refined_object)
147
+
148
+ return None
149
+
150
+ except ImportError as e:
151
+ logger.warning(f"GLiNER2 not installed: {e}")
152
+ return None
153
+ except Exception as e:
154
+ logger.debug(f"GLiNER2 extraction failed: {e}")
155
+ return None
156
+
157
+
158
+ def extract_triple_by_predicate_split(
159
+ source_text: str,
160
+ predicate: str,
161
+ ) -> tuple[str, str, str] | None:
162
+ """
163
+ Extract subject and object by splitting the source text around the predicate.
164
+
165
+ This is useful when the predicate is known but subject/object boundaries
166
+ are uncertain. Uses the predicate as an anchor point.
167
+
168
+ Args:
169
+ source_text: The source sentence
170
+ predicate: The predicate (verb phrase) to split on
171
+
172
+ Returns:
173
+ Tuple of (subject, predicate, object) or None if split fails
174
+ """
175
+ if not source_text or not predicate:
176
+ return None
177
+
178
+ # Find the predicate in the source text (case-insensitive)
179
+ source_lower = source_text.lower()
180
+ pred_lower = predicate.lower()
181
+
182
+ pred_pos = source_lower.find(pred_lower)
183
+ if pred_pos < 0:
184
+ # Try finding just the main verb (first word of predicate)
185
+ main_verb = pred_lower.split()[0] if pred_lower.split() else ""
186
+ if main_verb and len(main_verb) > 2:
187
+ pred_pos = source_lower.find(main_verb)
188
+ if pred_pos >= 0:
189
+ # Adjust to use the actual predicate length for splitting
190
+ predicate = main_verb
191
+
192
+ if pred_pos < 0:
193
+ return None
194
+
195
+ # Extract subject (text before predicate, trimmed)
196
+ subject = source_text[:pred_pos].strip()
197
+
198
+ # Extract object (text after predicate, trimmed)
199
+ pred_end = pred_pos + len(predicate)
200
+ obj = source_text[pred_end:].strip()
201
+
202
+ # Clean up: remove trailing punctuation from object
203
+ obj = obj.rstrip('.,;:!?')
204
+
205
+ # Clean up: remove leading articles/prepositions from object if very short
206
+ obj_words = obj.split()
207
+ if obj_words and obj_words[0].lower() in ('a', 'an', 'the', 'to', 'of', 'for'):
208
+ if len(obj_words) > 1:
209
+ obj = ' '.join(obj_words[1:])
210
+
211
+ # Validate: both subject and object should have meaningful content
212
+ if len(subject) < 2 or len(obj) < 2:
213
+ return None
214
+
215
+ logger.debug(
216
+ f"Predicate-split extracted: subj='{subject}', pred='{predicate}', obj='{obj}'"
217
+ )
218
+
219
+ return (subject, predicate, obj)
220
+
221
+
222
+ def score_entity_content(text: str) -> float:
223
+ """
224
+ Score how entity-like a text is using GLiNER2 entity recognition.
225
+
226
+ Returns:
227
+ 1.0 - Recognized as a named entity with high confidence
228
+ 0.8 - Recognized as an entity with moderate confidence
229
+ 0.6 - Partially recognized or contains entity-like content
230
+ 0.2 - Not recognized as any entity type
231
+ """
232
+ if not text or not text.strip():
233
+ return 0.2
234
+
235
+ try:
236
+ model = _get_model()
237
+
238
+ # Check if text is recognized as common entity types
239
+ entity_types = [
240
+ "person", "organization", "company", "location", "city", "country",
241
+ "product", "event", "date", "money", "quantity"
242
+ ]
243
+
244
+ result = model.extract_entities(
245
+ text,
246
+ entity_types,
247
+ include_confidence=True
248
+ )
249
+
250
+ # Result format: {'entities': {'person': [{'text': '...', 'confidence': 0.99}], ...}}
251
+ entities_dict = result.get("entities", {})
252
+
253
+ # Find best matching entity across all types
254
+ best_confidence = 0.0
255
+ text_lower = text.lower().strip()
256
+
257
+ for entity_type, entity_list in entities_dict.items():
258
+ for entity in entity_list:
259
+ if isinstance(entity, dict):
260
+ entity_text = entity.get("text", "").lower().strip()
261
+ confidence = entity.get("confidence", 0.5)
262
+ else:
263
+ # Fallback for string format
264
+ entity_text = str(entity).lower().strip()
265
+ confidence = 0.8
266
+
267
+ # Check if entity covers most of the input text
268
+ if entity_text == text_lower:
269
+ # Exact match
270
+ best_confidence = max(best_confidence, confidence)
271
+ elif entity_text in text_lower or text_lower in entity_text:
272
+ # Partial match - reduce confidence
273
+ best_confidence = max(best_confidence, confidence * 0.8)
274
+
275
+ if best_confidence >= 0.9:
276
+ return 1.0
277
+ elif best_confidence >= 0.7:
278
+ return 0.8
279
+ elif best_confidence >= 0.5:
280
+ return 0.6
281
+ elif best_confidence > 0:
282
+ return 0.4
283
+ else:
284
+ return 0.2
285
+
286
+ except Exception as e:
287
+ logger.debug(f"Entity scoring failed for '{text}': {e}")
288
+ return 0.5 # Neutral score on error
@@ -24,6 +24,14 @@ class EntityType(str, Enum):
24
24
  UNKNOWN = "UNKNOWN"
25
25
 
26
26
 
27
+ class ExtractionMethod(str, Enum):
28
+ """Method used to extract the triple components."""
29
+ HYBRID = "hybrid" # Model subject/object + GLiNER2 predicate
30
+ GLINER = "gliner" # All components from GLiNER2 extraction
31
+ SPLIT = "split" # Subject/object from splitting source text around predicate
32
+ MODEL = "model" # All components from T5-Gemma model (when GLiNER2 disabled)
33
+
34
+
27
35
  class Entity(BaseModel):
28
36
  """An entity (subject or object) with its text and type."""
29
37
  text: str = Field(..., description="The entity text")
@@ -52,12 +60,18 @@ class Statement(BaseModel):
52
60
  object: Entity = Field(..., description="The object entity")
53
61
  source_text: Optional[str] = Field(None, description="The original text this statement was extracted from")
54
62
 
63
+ # Extraction method tracking
64
+ extraction_method: ExtractionMethod = Field(
65
+ default=ExtractionMethod.MODEL,
66
+ description="Method used to extract this triple (hybrid, spacy, split, or model)"
67
+ )
68
+
55
69
  # Quality scoring fields
56
70
  confidence_score: Optional[float] = Field(
57
71
  None,
58
72
  ge=0.0,
59
73
  le=1.0,
60
- description="Groundedness score (0-1) indicating how well the triple is supported by source text"
74
+ description="Semantic similarity score (0-1) between source text and reassembled triple"
61
75
  )
62
76
  evidence_span: Optional[tuple[int, int]] = Field(
63
77
  None,
@@ -99,6 +113,7 @@ class Statement(BaseModel):
99
113
  object=merged_object,
100
114
  predicate=self.predicate,
101
115
  source_text=self.source_text,
116
+ extraction_method=self.extraction_method,
102
117
  confidence_score=self.confidence_score,
103
118
  evidence_span=self.evidence_span,
104
119
  canonical_predicate=self.canonical_predicate,
@@ -116,6 +131,7 @@ class Statement(BaseModel):
116
131
  object=self.subject,
117
132
  predicate=self.predicate,
118
133
  source_text=self.source_text,
134
+ extraction_method=self.extraction_method,
119
135
  confidence_score=self.confidence_score,
120
136
  evidence_span=self.evidence_span,
121
137
  canonical_predicate=self.canonical_predicate,
@@ -279,6 +295,16 @@ class ExtractionOptions(BaseModel):
279
295
  default=True,
280
296
  description="Use embedding similarity for predicate deduplication"
281
297
  )
298
+ use_gliner_extraction: bool = Field(
299
+ default=True,
300
+ description="Use GLiNER2 for predicate/subject/object extraction (model provides structure + coreference)"
301
+ )
302
+
303
+ # GLiNER2 predicate configuration
304
+ predicates: Optional[list[str]] = Field(
305
+ default=None,
306
+ description="Optional list of predefined predicate types for GLiNER2 relation extraction (e.g., ['works_for', 'founded'])"
307
+ )
282
308
 
283
309
  # Verbose logging
284
310
  verbose: bool = Field(
@@ -286,5 +312,11 @@ class ExtractionOptions(BaseModel):
286
312
  description="Enable verbose logging for debugging"
287
313
  )
288
314
 
315
+ # Triple selection
316
+ all_triples: bool = Field(
317
+ default=False,
318
+ description="Keep all candidate triples instead of selecting the highest-scoring one per source"
319
+ )
320
+
289
321
  class Config:
290
322
  arbitrary_types_allowed = True # Allow Callable type
@@ -2,13 +2,15 @@
2
2
  Scoring module for statement extraction quality assessment.
3
3
 
4
4
  Provides:
5
- - TripleScorer: Score individual triples for groundedness
5
+ - TripleScorer: Score individual triples combining semantic similarity and grammatical accuracy
6
6
  - BeamScorer: Score and select/merge beams based on quality metrics
7
7
  """
8
8
 
9
9
  import logging
10
10
  from typing import Optional
11
11
 
12
+ import numpy as np
13
+
12
14
  from .models import ScoringConfig, Statement
13
15
 
14
16
  logger = logging.getLogger(__name__)
@@ -16,62 +18,126 @@ logger = logging.getLogger(__name__)
16
18
 
17
19
  class TripleScorer:
18
20
  """
19
- Score individual triples for groundedness in source text.
20
-
21
- Groundedness is measured by checking:
22
- - Subject text appears in source
23
- - Object text appears in source
24
- - Subject and object are in proximity (same/nearby sentences)
25
- - Evidence span exists and is valid
21
+ Score individual triples combining semantic similarity and entity recognition.
22
+
23
+ The score is a weighted combination of:
24
+ - Semantic similarity (50%): Cosine similarity between source text and reassembled triple
25
+ - Subject entity score (25%): How entity-like the subject is (via GLiNER2)
26
+ - Object entity score (25%): How entity-like the object is (via GLiNER2)
27
+
28
+ Entity scoring (via GLiNER2):
29
+ - Recognized entity with high confidence: 1.0
30
+ - Recognized entity with moderate confidence: 0.8
31
+ - Partially recognized: 0.6
32
+ - Not recognized: 0.2
26
33
  """
27
34
 
28
- def __init__(self, config: Optional[ScoringConfig] = None):
35
+ def __init__(
36
+ self,
37
+ config: Optional[ScoringConfig] = None,
38
+ device: Optional[str] = None,
39
+ ):
29
40
  self.config = config or ScoringConfig()
30
41
 
42
+ # Auto-detect device
43
+ if device is None:
44
+ import torch
45
+ if torch.cuda.is_available():
46
+ self.device = "cuda"
47
+ elif torch.backends.mps.is_available():
48
+ self.device = "mps"
49
+ else:
50
+ self.device = "cpu"
51
+ else:
52
+ self.device = device
53
+
54
+ # Lazy-loaded embedding model
55
+ self._model = None
56
+ self._embedding_model_name = "all-MiniLM-L6-v2"
57
+
58
+ def _load_model(self):
59
+ """Load sentence-transformers model lazily."""
60
+ if self._model is not None:
61
+ return
62
+
63
+ from sentence_transformers import SentenceTransformer
64
+
65
+ logger.debug(f"Loading embedding model: {self._embedding_model_name} on {self.device}")
66
+ self._model = SentenceTransformer(self._embedding_model_name, device=self.device)
67
+ logger.debug(f"Embedding model loaded on {self.device}")
68
+
69
+ def _compute_embeddings(self, texts: list[str]) -> np.ndarray:
70
+ """Compute embeddings for a list of texts."""
71
+ self._load_model()
72
+ return self._model.encode(texts, convert_to_numpy=True)
73
+
74
+ def _cosine_similarity(self, vec1: np.ndarray, vec2: np.ndarray) -> float:
75
+ """Compute cosine similarity between two vectors."""
76
+ dot = np.dot(vec1, vec2)
77
+ norm1 = np.linalg.norm(vec1)
78
+ norm2 = np.linalg.norm(vec2)
79
+ if norm1 == 0 or norm2 == 0:
80
+ return 0.0
81
+ return float(dot / (norm1 * norm2))
82
+
83
+ def _score_noun_content(self, text: str) -> float:
84
+ """
85
+ Score how entity-like a text is using GLiNER2 entity recognition.
86
+
87
+ Returns:
88
+ 1.0 - Recognized as a named entity with high confidence
89
+ 0.8 - Recognized as an entity with moderate confidence
90
+ 0.6 - Partially recognized or contains entity-like content
91
+ 0.2 - Not recognized as any entity type
92
+ """
93
+ if not text or not text.strip():
94
+ return 0.2
95
+
96
+ try:
97
+ from .gliner_extraction import score_entity_content
98
+ return score_entity_content(text)
99
+ except Exception as e:
100
+ logger.debug(f"Entity scoring failed for '{text}': {e}")
101
+ return 0.5 # Neutral score on error
102
+
31
103
  def score_triple(self, statement: Statement, source_text: str) -> float:
32
104
  """
33
- Score a triple's groundedness (0-1).
105
+ Score a triple's quality (0-1) combining semantic similarity and grammatical accuracy.
106
+
107
+ The score is a weighted combination of:
108
+ - Semantic similarity (50%): How well the triple captures the source meaning
109
+ - Subject noun score (25%): Grammatical quality of subject
110
+ - Object noun score (25%): Grammatical quality of object
34
111
 
35
- Higher scores indicate better grounding in source text.
112
+ Higher scores indicate better overall quality.
36
113
  """
37
- if not source_text:
114
+ # Use statement's source_text if available, otherwise use provided source_text
115
+ reference_text = statement.source_text or source_text
116
+ if not reference_text:
38
117
  logger.debug(f" No source text, returning neutral score 0.5")
39
118
  return 0.5 # Neutral score if no source text
40
119
 
41
- score = 0.0
42
- weights_sum = 0.0
43
-
44
- # Check subject appears in source (weight: 0.3)
45
- subject_found = self._text_appears_in(statement.subject.text, source_text)
46
- score += 0.3 * (1.0 if subject_found else 0.0)
47
- weights_sum += 0.3
48
-
49
- # Check object appears in source (weight: 0.3)
50
- object_found = self._text_appears_in(statement.object.text, source_text)
51
- score += 0.3 * (1.0 if object_found else 0.0)
52
- weights_sum += 0.3
53
-
54
- # Check predicate has lexical trigger (weight: 0.2)
55
- predicate_grounded = self._predicate_has_trigger(statement.predicate, source_text)
56
- score += 0.2 * (1.0 if predicate_grounded else 0.0)
57
- weights_sum += 0.2
58
-
59
- # Check proximity - subject and object in same/nearby region (weight: 0.2)
60
- proximity_score = 0.0
61
- if subject_found and object_found:
62
- proximity_score = self._compute_proximity(
63
- statement.subject.text,
64
- statement.object.text,
65
- source_text
66
- )
67
- score += 0.2 * proximity_score
68
- weights_sum += 0.2
120
+ # Reassemble the triple
121
+ reassembled = f"{statement.subject.text} {statement.predicate} {statement.object.text}"
122
+
123
+ # Compute semantic similarity
124
+ embeddings = self._compute_embeddings([reference_text, reassembled])
125
+ semantic_similarity = self._cosine_similarity(embeddings[0], embeddings[1])
126
+
127
+ # Compute grammatical scores for subject and object
128
+ subject_noun_score = self._score_noun_content(statement.subject.text)
129
+ object_noun_score = self._score_noun_content(statement.object.text)
69
130
 
70
- final_score = score / weights_sum if weights_sum > 0 else 0.0
131
+ # Weighted combination: 50% semantic, 25% subject, 25% object
132
+ final_score = (
133
+ semantic_similarity * 0.5 +
134
+ subject_noun_score * 0.25 +
135
+ object_noun_score * 0.25
136
+ )
71
137
 
72
138
  logger.debug(
73
139
  f" Score for '{statement.subject.text}' --[{statement.predicate}]--> '{statement.object.text}': "
74
- f"{final_score:.2f} (subj={subject_found}, obj={object_found}, pred={predicate_grounded}, prox={proximity_score:.2f})"
140
+ f"{final_score:.3f} (semantic={semantic_similarity:.2f}, subj_noun={subject_noun_score:.2f}, obj_noun={object_noun_score:.2f})"
75
141
  )
76
142
 
77
143
  return final_score
@@ -115,54 +181,6 @@ class TripleScorer:
115
181
 
116
182
  return None
117
183
 
118
- def _text_appears_in(self, text: str, source: str) -> bool:
119
- """Check if text appears in source (case-insensitive)."""
120
- return text.lower() in source.lower()
121
-
122
- def _predicate_has_trigger(self, predicate: str, source: str) -> bool:
123
- """Check if predicate has a lexical trigger in source."""
124
- # Extract main verb/word from predicate
125
- words = predicate.lower().split()
126
- source_lower = source.lower()
127
-
128
- # Check if any predicate word appears in source
129
- for word in words:
130
- if len(word) > 2 and word in source_lower:
131
- return True
132
- return False
133
-
134
- def _compute_proximity(
135
- self,
136
- subject_text: str,
137
- object_text: str,
138
- source: str
139
- ) -> float:
140
- """
141
- Compute proximity score (0-1) based on distance between subject and object.
142
-
143
- Returns 1.0 if same sentence, decreasing with distance.
144
- """
145
- source_lower = source.lower()
146
- subj_pos = source_lower.find(subject_text.lower())
147
- obj_pos = source_lower.find(object_text.lower())
148
-
149
- if subj_pos < 0 or obj_pos < 0:
150
- return 0.0
151
-
152
- # Check if in same sentence
153
- start = min(subj_pos, obj_pos)
154
- end = max(subj_pos, obj_pos)
155
- region = source[start:end]
156
-
157
- # If no sentence boundary between them, high proximity
158
- if '.' not in region and '!' not in region and '?' not in region:
159
- return 1.0
160
-
161
- # Otherwise, score decreases with distance
162
- # Assume ~100 chars per sentence on average
163
- sentence_distance = region.count('.') + region.count('!') + region.count('?')
164
- return max(0.0, 1.0 - (sentence_distance * 0.2))
165
-
166
184
  def _extend_to_sentence(
167
185
  self,
168
186
  source: str,
@@ -1,11 +0,0 @@
1
- statement_extractor/__init__.py,sha256=MIZgn-lD9-XGJapzdyYxMhEJFRrTzftbRklrhwA4e8w,2967
2
- statement_extractor/canonicalization.py,sha256=ZMLs6RLWJa_rOJ8XZ7PoHFU13-zeJkOMDnvK-ZaFa5s,5991
3
- statement_extractor/cli.py,sha256=NIGCpqcnzF42B16RCiSu4kN0RlnVne2ZAT8341Znt1g,8558
4
- statement_extractor/extractor.py,sha256=r2gcCfZT43Q8STPuzaXmhbjWXTAs4JwMeAtCjQxlsIQ,25870
5
- statement_extractor/models.py,sha256=IE3TyIiOl2CINPMroQnGT12rSeQFR0bV3y4BJ79wLmI,10877
6
- statement_extractor/predicate_comparer.py,sha256=jcuaBi5BYqD3TKoyj3pR9dxtX5ihfDJvjdhEd2LHCwc,26184
7
- statement_extractor/scoring.py,sha256=xs0SxrV42QNBULQguU1-HhcCc-HnS-ekbcdx7FqWGVk,15663
8
- corp_extractor-0.2.11.dist-info/METADATA,sha256=D-fs9i9kn4v5bRAHCHxI3cq_6vosNgDCN7uuYwVZztM,13775
9
- corp_extractor-0.2.11.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
10
- corp_extractor-0.2.11.dist-info/entry_points.txt,sha256=i0iKFqPIusvb-QTQ1zNnFgAqatgVah-jIhahbs5TToQ,115
11
- corp_extractor-0.2.11.dist-info/RECORD,,