renard-pipeline 0.4.0__py3-none-any.whl → 0.4.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of renard-pipeline might be problematic. Click here for more details.

@@ -159,6 +159,7 @@ class GraphRulesCharacterUnifier(PipelineStep):
159
159
  min_appearances: int = 0,
160
160
  additional_hypocorisms: Optional[List[Tuple[str, List[str]]]] = None,
161
161
  link_corefs_mentions: bool = False,
162
+ ignore_lone_titles: Optional[Set[str]] = None,
162
163
  ) -> None:
163
164
  """
164
165
  :param min_appearances: minimum number of appearances of a
@@ -173,10 +174,14 @@ class GraphRulesCharacterUnifier(PipelineStep):
173
174
  extract a lot of spurious links. However, linking by
174
175
  coref is sometimes the only way to resolve a character
175
176
  alias.
177
+ :param ignore_lone_titles: a set of titles to ignore when
178
+ they stand on their own. This avoids extracting false
179
+ positives characters such as 'Mr.' or 'Miss'.
176
180
  """
177
181
  self.min_appearances = min_appearances
178
182
  self.additional_hypocorisms = additional_hypocorisms
179
183
  self.link_corefs_mentions = link_corefs_mentions
184
+ self.ignore_lone_titles = ignore_lone_titles or set()
180
185
 
181
186
  super().__init__()
182
187
 
@@ -197,11 +202,16 @@ class GraphRulesCharacterUnifier(PipelineStep):
197
202
  import networkx as nx
198
203
 
199
204
  mentions = [m for m in entities if m.tag == "PER"]
200
- mentions_str = [" ".join(m.tokens) for m in mentions]
205
+ mentions_str = set(
206
+ filter(
207
+ lambda m: not m in self.ignore_lone_titles,
208
+ map(lambda m: " ".join(m.tokens), mentions),
209
+ )
210
+ )
201
211
 
202
212
  # * create a graph where each node is a mention detected by NER
203
213
  G = nx.Graph()
204
- for mention_str in set(mentions_str):
214
+ for mention_str in mentions_str:
205
215
  G.add_node(mention_str)
206
216
 
207
217
  # * HumanName local configuration - dependant on language
renard/pipeline/core.py CHANGED
@@ -50,6 +50,13 @@ class Mention:
50
50
  self_dict["end_idx"] = self.end_idx + shift
51
51
  return self.__class__(**self_dict)
52
52
 
53
+ def __eq__(self, other: Mention) -> bool:
54
+ return (
55
+ self.tokens == other.tokens
56
+ and self.start_idx == other.start_idx
57
+ and self.end_idx == other.end_idx
58
+ )
59
+
53
60
  def __hash__(self) -> int:
54
61
  return hash(tuple(self.tokens) + (self.start_idx, self.end_idx))
55
62
 
@@ -1,7 +1,8 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: renard-pipeline
3
- Version: 0.4.0
3
+ Version: 0.4.2
4
4
  Summary: Relationships Extraction from NARrative Documents
5
+ Home-page: https://github.com/CompNet/Renard
5
6
  License: GPL-3.0-only
6
7
  Author: Arthur Amalvy
7
8
  Author-email: arthur.amalvy@univ-avignon.fr
@@ -31,6 +32,8 @@ Requires-Dist: tibert (>=0.3.0,<0.4.0)
31
32
  Requires-Dist: torch (>=2.0.0,!=2.0.1)
32
33
  Requires-Dist: tqdm (>=4.62.3,<5.0.0)
33
34
  Requires-Dist: transformers (>=4.36.0,<5.0.0)
35
+ Project-URL: Documentation, https://compnet.github.io/Renard/
36
+ Project-URL: Repository, https://github.com/CompNet/Renard
34
37
  Description-Content-Type: text/markdown
35
38
 
36
39
  # Renard
@@ -46,6 +49,8 @@ You can install the latest version using pip:
46
49
 
47
50
  > pip install renard-pipeline
48
51
 
52
+ Currently, Renard supports Python 3.8, 3.9 and 3.10.
53
+
49
54
 
50
55
  # Documentation
51
56
 
@@ -56,7 +61,32 @@ If you need local documentation, it can be generated using `Sphinx`. From the `d
56
61
 
57
62
  # Tutorial
58
63
 
59
- `renard_tutorial.py` is a tutorial in the `jupytext` format. You can open it as a notebook in Jupyter Notebook (or export it as a notebook with `jupytext --to ipynb renard-tutorial.py`).
64
+ Renard's central concept is the `Pipeline`.A `Pipeline` is a list of `PipelineStep` that are run sequentially in order to extract a character graph from a document. Here is a simple example:
65
+
66
+ ```python
67
+ from renard.pipeline import Pipeline
68
+ from renard.pipeline.tokenization import NLTKTokenizer
69
+ from renard.pipeline.ner import NLTKNamedEntityRecognizer
70
+ from renard.pipeline.character_unification import GraphRulesCharacterUnifier
71
+ from renard.pipeline.graph_extraction import CoOccurrencesGraphExtractor
72
+
73
+ with open("./my_doc.txt") as f:
74
+ text = f.read()
75
+
76
+ pipeline = Pipeline(
77
+ [
78
+ NLTKTokenizer(),
79
+ NLTKNamedEntityRecognizer(),
80
+ GraphRulesCharacterUnifier(min_appearance=10),
81
+ CoOccurrencesGraphExtractor(co_occurrences_dist=25)
82
+ ]
83
+ )
84
+
85
+ out = pipeline(text)
86
+ ```
87
+
88
+ For more information, see `renard_tutorial.py`, which is a tutorial in the `jupytext` format. You can open it as a notebook in Jupyter Notebook (or export it as a notebook with `jupytext --to ipynb renard-tutorial.py`).
89
+
60
90
 
61
91
 
62
92
  # Running tests
@@ -3,9 +3,9 @@ renard/graph_utils.py,sha256=5jwky9JgJ-WMVHfeaiXkAAQwEfhR2BFSrWhck1Qmpgo,5812
3
3
  renard/ner_utils.py,sha256=jN1AQkaV0Kx-Bc0oc3SYBEmSUuKPBbzXqByOlaqH62k,11263
4
4
  renard/nltk_utils.py,sha256=mUJiwMrEDZV4Fla7WuMR-hA_OC2ZIwSXgW_0Ew18VSo,977
5
5
  renard/pipeline/__init__.py,sha256=8Yim2mmny8YGvM7N5-na5zK-C9UDxUb77K9ml-VirUA,35
6
- renard/pipeline/character_unification.py,sha256=GcnC8UYqn1RBOGVhYS9LVcTNqpxm9YoT-lPsE3vodek,14818
6
+ renard/pipeline/character_unification.py,sha256=GJvPKw2zSMi0RpLLVlKsu7ewpxkrdxytND9PLxolbP4,15252
7
7
  renard/pipeline/characters_extraction.py,sha256=NzF8H9X19diW6rqwS5ERrRku7rFueO3S077H5C6kb7I,363
8
- renard/pipeline/core.py,sha256=Xw6AUQkqasBu2b1uCqAs4Pn_vLrZd1mDsP9TepuudFo,22360
8
+ renard/pipeline/core.py,sha256=luKNUTCDtZfwKzxVIaImyIMwFFvIknfT1LdQtongj24,22570
9
9
  renard/pipeline/corefs/__init__.py,sha256=9c9AaXBcRrDBf1jhTtJ7DyjOJhX_Zej3FjlcGak7MK8,44
10
10
  renard/pipeline/corefs/corefs.py,sha256=nzYT6S9ify3FlgGB3FSDpAhs2UQYgW9c3CL2GRYzTms,11508
11
11
  renard/pipeline/graph_extraction.py,sha256=n0T_nzNGiwE9bDubpPknHe7bbDhJ4ndnqmoMmyfbeWg,19468
@@ -29,7 +29,7 @@ renard/resources/pronouns/pronouns.py,sha256=YJ8hM6H8QHrF2Xx6O5blqc-Sqe1D1YFL0sR
29
29
  renard/resources/titles/__init__.py,sha256=Jcg4B7stsWiAaXbFgNl_L3ICtCQmFe9bo3YjdkVL50w,45
30
30
  renard/resources/titles/titles.py,sha256=GsFccVJuTkgDWiAqWZpFd2R9pGvFKQZBOk4RWWuWDkw,968
31
31
  renard/utils.py,sha256=8J3swFqSi4YqhgYNXvttJ0s-DmJbl_yEYri6JpGEWH8,2340
32
- renard_pipeline-0.4.0.dist-info/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
33
- renard_pipeline-0.4.0.dist-info/METADATA,sha256=LcFmKE9KkONguzTVGueOvPBiaavH98D8xFZKjYFQkLI,2681
34
- renard_pipeline-0.4.0.dist-info/WHEEL,sha256=Zb28QaM1gQi8f4VCBhsUklF61CTlNYfs9YAZn-TOGFk,88
35
- renard_pipeline-0.4.0.dist-info/RECORD,,
32
+ renard_pipeline-0.4.2.dist-info/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
33
+ renard_pipeline-0.4.2.dist-info/METADATA,sha256=R1ZbG6Mdk1B5Zk73QSKB-lZu7rDnvWKe3M5JiDqPFxM,3697
34
+ renard_pipeline-0.4.2.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
35
+ renard_pipeline-0.4.2.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: poetry-core 1.6.1
2
+ Generator: poetry-core 1.9.0
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any