commonmeta-ruby 3.9.0 → 3.12.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +12 -11
- data/lib/commonmeta/author_utils.rb +12 -5
- data/lib/commonmeta/readers/commonmeta_reader.rb +1 -1
- data/lib/commonmeta/readers/datacite_reader.rb +120 -108
- data/lib/commonmeta/schema_utils.rb +1 -1
- data/lib/commonmeta/utils.rb +47 -2
- data/lib/commonmeta/version.rb +1 -1
- data/lib/commonmeta/writers/commonmeta_writer.rb +1 -1
- data/resources/{commonmeta_v0.10.5.json → commonmeta_v0.10.7.json} +21 -5
- data/resources/{datacite-v4.json → datacite-v45.json} +26 -5
- data/resources/kernel-4/include/datacite-relationType-v4.xsd +2 -0
- data/resources/kernel-4/include/datacite-resourceType-v4.xsd +2 -0
- data/resources/kernel-4/metadata.xsd +11 -7
- data/spec/author_utils_spec.rb +10 -0
- data/spec/fixtures/commonmeta.json +1 -1
- data/spec/fixtures/datacite-dataset_v4.5.json +736 -0
- data/spec/fixtures/datacite-instrument.json +135 -0
- data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/get_datacite_metadata/SoftwareSourceCode.yml +8 -8
- data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/get_datacite_metadata/dissertation.yml +12 -12
- data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/get_datacite_metadata/funding_references.yml +12 -12
- data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/get_datacite_metadata/subject_scheme.yml +22 -22
- data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/get_json_feed_item_metadata/medium_post_with_institutional_author.yml +317 -0
- data/spec/readers/commonmeta_reader_spec.rb +1 -1
- data/spec/readers/datacite_reader_spec.rb +68 -14
- data/spec/readers/json_feed_reader_spec.rb +25 -0
- data/spec/utils_spec.rb +30 -4
- data/spec/writers/commonmeta_writer_spec.rb +30 -3
- data/spec/writers/csl_writer_spec.rb +1 -0
- data/spec/writers/csv_writer_spec.rb +1 -0
- data/spec/writers/datacite_writer_spec.rb +0 -1
- metadata +7 -4
@@ -0,0 +1,317 @@
|
|
1
|
+
---
|
2
|
+
http_interactions:
|
3
|
+
- request:
|
4
|
+
method: get
|
5
|
+
uri: https://api.rogue-scholar.org/posts/05f01f68-ef81-47d7-a3c1-40aba91d358f
|
6
|
+
body:
|
7
|
+
encoding: ASCII-8BIT
|
8
|
+
string: ''
|
9
|
+
headers:
|
10
|
+
Connection:
|
11
|
+
- close
|
12
|
+
Host:
|
13
|
+
- api.rogue-scholar.org
|
14
|
+
User-Agent:
|
15
|
+
- http.rb/5.1.1
|
16
|
+
response:
|
17
|
+
status:
|
18
|
+
code: 200
|
19
|
+
message: OK
|
20
|
+
headers:
|
21
|
+
Content-Type:
|
22
|
+
- application/json
|
23
|
+
Content-Length:
|
24
|
+
- '23886'
|
25
|
+
Ratelimit-Limit:
|
26
|
+
- '15'
|
27
|
+
Ratelimit-Remaining:
|
28
|
+
- '14'
|
29
|
+
Ratelimit-Reset:
|
30
|
+
- '3'
|
31
|
+
Date:
|
32
|
+
- Wed, 31 Jan 2024 19:50:01 GMT
|
33
|
+
Server:
|
34
|
+
- Fly/ba9e227a (2024-01-26)
|
35
|
+
Via:
|
36
|
+
- 1.1 fly.io
|
37
|
+
Fly-Request-Id:
|
38
|
+
- 01HNGH4EZV3XQF20H1PZ6X5N07-fra
|
39
|
+
body:
|
40
|
+
encoding: UTF-8
|
41
|
+
string: '{"abstract":null,"archive_url":null,"authors":[{"name":"Research Graph"}],"blog":{"api":false,"archive_prefix":null,"authors":null,"backlog":0,"canonical_url":null,"category":"computerAndInformationSciences","created_at":1706685423,"current_feed_url":null,"description":"Stories
|
42
|
+
by Research Graph on Medium","favicon":"https://cdn-images-1.medium.com/fit/c/150/150/1*laJi0jBkVoGhXid7gD_DmQ.png","feed_format":"application/rss+xml","feed_url":"https://medium.com/@researchgraph/feed","filter":null,"funding":null,"generator":"Medium","generator_raw":"Medium","home_page_url":"https://medium.com/@researchgraph","id":"30da2ca9-8258-4ab5-acca-3919d9a5d98d","indexed":true,"issn":null,"language":"en","license":"https://creativecommons.org/licenses/by/4.0/legalcode","mastodon":"","plan":"Starter","prefix":"10.59350","relative_url":null,"ror":null,"secure":true,"slug":"researchgraph","status":"active","title":"Research
|
43
|
+
Graph","updated_at":1706151454,"use_api":null,"use_mastodon":false,"user_id":"a7e16958-1175-437c-b839-d4b8a47ec811","version":"https://jsonfeed.org/version/1.1"},"blog_name":"Research
|
44
|
+
Graph","blog_slug":"researchgraph","content_text":"**Tools and Platform for
|
45
|
+
Integration of Knowledge Graph with RAG\npipelines.**\n\n<figure>\n<img\nsrc=\"https://cdn-images-1.medium.com/max/1024/1*bJ3eWZ7301vYDzBomwdLfQ.png\"\nalt=\"Complex
|
46
|
+
network connected to books and showing information from magespace\" />\n<figcaption>Image
|
47
|
+
Created in <a\nhref=\"https://www.mage.space/\">https://www.mage.space/</a></figcaption>\n</figure>\n\nAuthors:
|
48
|
+
[Aland\nAstudillo](https://www.linkedin.com/in/aland-astudillo/), [Aishwarya\nNambissan](https://www.linkedin.com/in/aishwarya-nambissan-127229200/)\n\nMany
|
49
|
+
users of chatbots such as ChatGPT, have encountered the problem of\nreceiving
|
50
|
+
inappropriate or incompatible responses. There are several\nreasons why this
|
51
|
+
might\u00a0happen.\n\nOne reason is the lack of appropriate training data,
|
52
|
+
as chatbots are\nusually trained on large amounts of text and code. If the
|
53
|
+
data is\ninsufficient or of poor quality, the chatbot may misunderstand queries\nand
|
54
|
+
provide inaccurate responses. Another reason is that some chatbots\nare designed
|
55
|
+
for specific tasks or domains, which limits their ability\nto handle broader
|
56
|
+
queries or understand subtle nuances in conversation.\nAdditionally, chatbots
|
57
|
+
may struggle with natural language, which is\ncomplex and often ambiguous.
|
58
|
+
This can cause them to misunderstand a\nuser''s query and provide irrelevant
|
59
|
+
or off-topic responses. Finally,\nthere are technical limitations, such as
|
60
|
+
the chatbot''s inability to\nreason or make inferences.\n\nThis article explores
|
61
|
+
a potential solution by combining two influential\napproaches in the field
|
62
|
+
of Natural Language Processing\u200a---\u200aRetrieval\nAugmented Generation
|
63
|
+
(**RAG**) and Knowledge Graphs(**KGs**). We will\ndelve into the partnership
|
64
|
+
between these two entities, discuss the\nnotable technologies and software
|
65
|
+
used in their processes, and highlight\nvarious options for utilizing their
|
66
|
+
combined potential.\n\n### **RAG**\n\nRetrieval-Augmented Generation is the
|
67
|
+
process of optimizing the output\nof a large language model using a knowledge
|
68
|
+
base outside of its training\ndata sources before generating a response. It
|
69
|
+
takes an input and\nretrieves a set of relevant/supporting documents given
|
70
|
+
a source (e.g.,\nWikipedia). This can be thought of as a Large Language Model
|
71
|
+
(LLM) not\njust putting words together, but carefully selecting relevant\ninformation
|
72
|
+
from external sources and Knowledge Graphs to create\nwell-informed and detailed
|
73
|
+
responses.\n\n### RAG Retrieval Techniques\n\nThe following are some crucial
|
74
|
+
technologies that enable RAG''s impressive\nability to retrieve and incorporate
|
75
|
+
relevant information:\n\n**Vector Search**: It transforms text into numerical
|
76
|
+
vectors, capturing\ntheir meaning and nuances in a mathematical space, creating
|
77
|
+
a map of\nrelationships. Similar texts, like those discussing shared topics
|
78
|
+
or\nusing similar language, end up positioned close together in this space,\nallowing
|
79
|
+
vector search to quickly identify them as related. This allows\nlightning-fast
|
80
|
+
comparisons, finding similar texts based on meaning, not\njust keywords.\n\nAlgorithms
|
81
|
+
like [**Faiss**](https://github.com/facebookresearch/faiss)\nand [**Annoy**](https://github.com/spotify/annoy)
|
82
|
+
map text into dense\nvectors, enabling fast comparisons and retrieval of relevant
|
83
|
+
passages\nbased on semantic similarity.\n\n**Passage Ranking**: It is an internal
|
84
|
+
algorithm that scores candidate\ntext passages based on their relevance to
|
85
|
+
a query. It considers factors\nlike keyword frequency, keyword overlap, and
|
86
|
+
document structure to act\nlike a judge, sifting through information to select
|
87
|
+
the most fitting and\ninformative passages.\n\nKeyword overlap measures how
|
88
|
+
often the same keywords appear in **both**\nthe query and the candidate passage,
|
89
|
+
emphasizing shared vocabulary and\npotential relevance. It differs from keyword
|
90
|
+
frequency, which simply\ncounts how often individual keywords appear within
|
91
|
+
a passage, regardless\nof their presence in the\u00a0query.\n\nTechniques
|
92
|
+
like [**BM25**](https://github.com/getalp/wikIR) and\n[**TF-IDF**](https://github.com/marcocor/wikipedia-idf)
|
93
|
+
score candidate\npassages based on keyword overlap and frequency, ensuring
|
94
|
+
retrieved\ninformation truly fits the\u00a0context.\n\n**Graph Neural Networks**
|
95
|
+
(**GNNs**): They are neural networks designed\nto explore and learn from interconnected
|
96
|
+
data like maps, social\nnetworks, and other complex relationships. Unlike
|
97
|
+
traditional processing\nmethods that go through data in a linear fashion,
|
98
|
+
GNNs are capable of\nrecognizing hidden patterns and understanding relationships
|
99
|
+
like \"who\nknows who\" and \"what connects to what\" by \"hopping\" across
|
100
|
+
connections\nin\u00a0data.\n\nConsider a graph as a network of dots(nodes)
|
101
|
+
connected by lines (edges).\nEach dot represents some information, like a
|
102
|
+
person, object, or concept.\nThe lines tell you how these things relate to
|
103
|
+
each\u00a0other.\n\nGNNs work in rounds. In each\u00a0round:\n\n1. Message
|
104
|
+
Passing: Each node \"talks\" to its neighbors, sending\n messages along
|
105
|
+
the edges. These messages contain information about\n the node itself and
|
106
|
+
its features.\n2. Node Update: Each node receives messages from all its neighbors
|
107
|
+
and\n combines them with its own information. This update can involve\n calculations
|
108
|
+
and applying a special function.\n3. Output Calculation: Based on the updated
|
109
|
+
information, the network\n calculates an output for each node. This output
|
110
|
+
could be a\n prediction about the node''s category, its relationship to
|
111
|
+
another\n node, or some other relevant information.\n\nThis process repeats
|
112
|
+
for multiple rounds, allowing nodes to incorporate\ninformation from their
|
113
|
+
entire neighborhood, not just their direct\nneighbors. As the rounds progress,
|
114
|
+
the network learns to understand the\nrelationships between nodes and the
|
115
|
+
overall structure of the\u00a0graph.\n\nWhen dealing with Knowledge Graphs,
|
116
|
+
frameworks like\n[**PyTorch-Geometric**](https://readthedocs.org/projects/pytorch-geometric/)\nand
|
117
|
+
[**DeepMind''s\nGNN**](https://github.com/deepmind/deepmind-research/blob/master/learning_to_simulate/graph_network.py)\nlibrary
|
118
|
+
come into play. These frameworks allow GNNs to traverse\ninterconnected entities
|
119
|
+
and relationships within the graph, retrieve\nrelevant knowledge fragments,
|
120
|
+
and understand complex connections.\n\n### **Knowledge Graphs: The Structured
|
121
|
+
Wisdom\u00a0Library**\n\nA knowledge graph, also referred to as a semantic
|
122
|
+
network, is a\nstructure that represents a network of real-world entities
|
123
|
+
such as\nobjects, events, situations, or concepts. It helps to illustrate
|
124
|
+
the\nconstantly changing representations of the world, connecting entities\n(such
|
125
|
+
as \"Marie Curie\") and relationships (such as \"won Nobel Prize\") to\nform
|
126
|
+
a complex network of information. This information is typically\nstored in
|
127
|
+
a graph database and visualized as a graph structure, thus the\nterm knowledge
|
128
|
+
\"graph\".\n\nKGs go beyond simply finding relevant facts and delve deeper
|
129
|
+
into\nunderstanding the relationships and insights hidden within using these\nprocesses:\n\n**Entity
|
130
|
+
Linking**: Imagine a vast network of information, like a big\npuzzle of dots.
|
131
|
+
Now imagine trying to connect specific names, places,\nand concepts to their
|
132
|
+
corresponding dots in the puzzle. That is what\nentity linking does with text
|
133
|
+
and knowledge graphs, connecting the\nspecific components of the text to the
|
134
|
+
corresponding nodes in the graph.\nThey help systems understand the exact
|
135
|
+
meaning of entities, and find\nrelevant information from the\u00a0graph.\n\nLibraries
|
136
|
+
like [**DGL-KeLP**](https://github.com/awslabs/dgl-ke)\nleverage GNNs to identify
|
137
|
+
and link named entities (like \"Marie Curie\")\nto their respective nodes
|
138
|
+
within the Knowledge Graphs, enabling RAG to\nretrieve information that is
|
139
|
+
directly relevant to the core subject of a\nsearch\u00a0query\n\n**Path Mining**:
|
140
|
+
Path mining is a process of uncovering hidden\nrelationships and patterns
|
141
|
+
that are not easily noticeable. It involves\nexploring complicated networks
|
142
|
+
of information and identifying and\ntracing connections between entities that
|
143
|
+
may seem unrelated. By doing\nso, path mining reveals surprising insights
|
144
|
+
and useful knowledge,\nimproving our understanding of the complex structures
|
145
|
+
within knowledge\ngraphs.\n\nTools like [**Neo4j**](https://neo4j.com/) and\n[**Stanza**](https://github.com/stanfordnlp/stanza)
|
146
|
+
allow traversing\npaths between entities, uncovering hidden relationships,
|
147
|
+
and generating\ninsightful responses based on this deeper understanding.\n\n**Reasoning
|
148
|
+
and Inference**: In the context of knowledge graphs,\nreasoning and inference
|
149
|
+
are not just limited to discovering facts; they\nare also concerned with utilizing
|
150
|
+
them effectively. This involves\nintegrating data, drawing meaningful connections,
|
151
|
+
and using logical\nreasoning to resolve issues, foresee future occurrences,
|
152
|
+
or even\nconstruct narratives leveraging the insights provided by the knowledge\ngraph.\n\nConsider
|
153
|
+
the scenario of trying to find an organization that works in\nspecific sectors
|
154
|
+
with the help of a knowledge graph. This analogy\neffectively highlights the
|
155
|
+
active role of reasoning and inference in\nknowledge graphs:\n\n1. Gathering
|
156
|
+
Facts: Knowledge graphs collect and organize information\n from various
|
157
|
+
sources, such as websites, databases, academic papers,\n and social media
|
158
|
+
platforms. These facts are represented as\n structured data, with entities
|
159
|
+
(e.g., organizations) and their\n attributes (e.g., sectors in which they
|
160
|
+
operate) forming nodes and\n edges in the graph. By combining data about
|
161
|
+
organizations and\n sectors, knowledge graphs enable the gathering of relevant
|
162
|
+
facts for\n analysis.\n2. Integrating information: By connecting an organization''s\n relationships
|
163
|
+
with specific sectors, such as partnerships,\n investments, or certifications,
|
164
|
+
knowledge graphs reveal the scope\n and relevance of their work within
|
165
|
+
those sectors. Links to related\n entities like employees, board members,
|
166
|
+
or projects can further\n contribute to understanding an organization''s
|
167
|
+
involvement in\n specific\u00a0sectors.\n3. Predicting and Creating: Knowledge
|
168
|
+
graphs can leverage machine\n learning and predictive models to infer missing
|
169
|
+
or hidden\n information. By analyzing the available facts and connections
|
170
|
+
within\n the graph, these models can predict an organization''s potential\n involvement
|
171
|
+
in sectors that have common attributes with their known\n areas of operation.
|
172
|
+
For example, if an organization has expertise in\n renewable energy, predictive
|
173
|
+
models could suggest their likely\n involvement in related sectors like
|
174
|
+
clean transportation or\n sustainable infrastructure. Additionally, knowledge
|
175
|
+
graphs\n facilitate the creation of new information and insights by combining\n existing
|
176
|
+
facts with external data sources. For instance, by\n integrating real-time
|
177
|
+
data on industry trends, market analysis, or\n news articles, knowledge
|
178
|
+
graphs enable the discovery of emerging\n sectors or upcoming organizations
|
179
|
+
that might align with the given\n parameters.\n\nA framework like [**Atomspace**](https://github.com/opencog/atomspace)\nfrom
|
180
|
+
[**OpenCog**](https://opencog.org/) empowers RAG to reason and\ninfer new
|
181
|
+
knowledge. By traversing paths and combining information from\ninterconnected
|
182
|
+
entities, the system can generate informed predictions or\nanswer hypothetical
|
183
|
+
questions.\n\n### Purpose\n\nThe combination of Retrieval-Augmented Generation
|
184
|
+
(RAG) and Knowledge\nGraphs (KG) is beneficial for several\u00a0reasons:\n\n1. **Enhanced
|
185
|
+
information retrieval**: Knowledge graphs provide\n structured and interconnected
|
186
|
+
information that can significantly\n improve the effectiveness of information
|
187
|
+
retrieval. By using KGs,\n RAG models can retrieve more accurate and relevant
|
188
|
+
information,\n leading to better generation and response\u00a0quality.\n2. **Reliable
|
189
|
+
and diverse information:** KGs are constructed from\n authoritative sources,
|
190
|
+
making them reliable and trustworthy sources\n of information. RAG models
|
191
|
+
can leverage this reliable information to\n generate more accurate responses.
|
192
|
+
Additionally, KGs help in\n diversifying the generated responses by providing
|
193
|
+
a broader pool of\n related facts and entities.\n3. **Context-aware understanding**:
|
194
|
+
KGs enable RAG models to understand\n and reason over the contextual information.
|
195
|
+
By leveraging the\n relationships and semantic connections encoded in KGs,
|
196
|
+
RAG models\n can better grasp the context of user queries or conversations,\n resulting
|
197
|
+
in more coherent and appropriate responses.\n4. **Handling complex queries**:
|
198
|
+
KGs allow RAG models to tackle complex\n queries by breaking them down
|
199
|
+
into smaller sub-queries, retrieving\n relevant pieces of information from
|
200
|
+
the KG, and then generating a\n response based on the retrieved knowledge.
|
201
|
+
This enables RAG models\n to handle a wide range of user queries effectively.\n5. **Explainability
|
202
|
+
and transparency**: KGs provide a transparent and\n interpretable representation
|
203
|
+
of knowledge. By integrating KG-based\n retrieval into RAG models, the
|
204
|
+
reasoning behind the generated\n responses becomes more explainable. Users
|
205
|
+
can have a clear\n understanding of the knowledge sources and connections
|
206
|
+
used to\n produce the response.\n6. **Scalability**: Knowledge graphs
|
207
|
+
act as large-scale repositories of\n information. RAG models can leverage
|
208
|
+
KGs to generate responses to\n various queries or conversations without
|
209
|
+
requiring additional\n supervised training data. This makes the RAG+KG
|
210
|
+
approach scalable to\n handle an extensive range of knowledge domains and
|
211
|
+
user\u00a0queries.\n\n### **Pipeline Possibilities: Orchestrating RAG and\u00a0KGs:**\n\nLet''s
|
212
|
+
explore some exciting pipeline options for harnessing the combined\npower
|
213
|
+
of RAG and Knowledge Graphs. There are two options in which either\nthe LLM
|
214
|
+
is prioritized or the Knowledge Graph is prioritized:\n\n**Option 1: LLM-Centric
|
215
|
+
Pipeline:**\n\nThe LLM-Centric pipeline is a RAG and Knowledge Graph combination
|
216
|
+
that\nempowers LLMs to craft well-informed responses. Here''s how it\u00a0works:\n\n1. Start
|
217
|
+
with the user''s question or statement\n2. The LLM (like GPT-3) generates
|
218
|
+
an initial draft response based on\n its internal knowledge. This draft
|
219
|
+
may lack specific factual details\n or nuances that a knowledge graph can\u00a0provide.\n3. RAG
|
220
|
+
kicks in, searching the text corpus or the Knowledge Graph for\n relevant
|
221
|
+
passages that enrich the draft. During the retrieval\n process, RAG retrieval
|
222
|
+
techniques are used to search not only text\n corpora but also knowledge
|
223
|
+
graphs to find relevant information. This\n means that RAG can directly
|
224
|
+
tap into the structured knowledge within\n the graph to retrieve facts,
|
225
|
+
relationships, and entities that align\n with the user''s query and the
|
226
|
+
LLM''s generated draft.\n4. The retrieved information is carefully fused
|
227
|
+
with the LLM''s output,\n creating a more factually accurate and insightful
|
228
|
+
response\n5. A final polishing step ensures the response is fluent, grammatically\n correct,
|
229
|
+
and ready to\u00a0show.\n\n<figure>\n<img\nsrc=\"https://cdn-images-1.medium.com/max/1024/0*3pd9MOIflkbS07wI\"
|
230
|
+
/>\n<figcaption>RAG LLM-centric generic\u00a0scheme.</figcaption>\n</figure>\n\nThe
|
231
|
+
basic steps to perform this\u00a0are:\n\n1. **Pre-processing**: Clean and
|
232
|
+
tokenize user input to prepare for\n processing.\n2. **LLM Generation**:
|
233
|
+
Generate an initial draft response using an LLM\n like [**GPT-3**](https://openai.com/product)
|
234
|
+
or [**Jurassic-1\n Jumbo**](https://www.livescience.com/google-sentient-ai-lamda-lemoine).\n3. **Retrieval**:
|
235
|
+
Employ RAG techniques to retrieve relevant passages\n from a text corpus
|
236
|
+
or Knowledge Graphs.\n4. **Fusion**: Integrate retrieved information into
|
237
|
+
the LLM-generated\n draft, creating a more informed and factually-grounded
|
238
|
+
response.\n5. **Post-processing**: Refine the final response for fluency,\n grammatical
|
239
|
+
correctness, and overall coherence.\n\n**Option 2: Knowledge Graphs-Centric
|
240
|
+
Pipeline:**\n\nIn this approach, knowledge graphs take center stage. In essence,
|
241
|
+
this\npipeline prioritizes the structured knowledge within knowledge graphs,\nusing
|
242
|
+
RAG retrieval techniques to translate those insights into\ncompelling and
|
243
|
+
informative language. Here''s how it\u00a0unfolds:\n\n1. User input: The
|
244
|
+
process begins with the user''s question or statement\n2. Graph exploration:
|
245
|
+
The knowledge graph is meticulously explored to\n identify relevant entities,
|
246
|
+
relationships, and paths that align with\n the user''s input. This stage
|
247
|
+
involves techniques like entity\n linking, path mining, and reasoning to
|
248
|
+
uncover valuable information\n within the\u00a0graph\n3. Response planning:
|
249
|
+
The insights extracted from the graph are used to\n create a structured
|
250
|
+
response plan. This plan outlines the key\n points, facts, and logical
|
251
|
+
flow that the final response\n should\u00a0embody\n4. Language generation:
|
252
|
+
This is where RAG steps in. Its purpose is to\n create human-like text
|
253
|
+
that follows the response plan. It uses LLMs\n to produce well-written
|
254
|
+
sentences and paragraphs, combining the\n relevant information from the
|
255
|
+
knowledge graph while maintaining\n cohesiveness and readability.\n5. Post-processing:
|
256
|
+
The generated response undergoes a final refinement\n process to ensure
|
257
|
+
grammatical correctness, clarity, and\n overall\u00a0quality\n\n<figure>\n<img\nsrc=\"https://cdn-images-1.medium.com/max/1024/0*mZ83esKBjbPmCq_C\"
|
258
|
+
/>\n<figcaption>RAG Knowledge Graph-centric generic\u00a0scheme.</figcaption>\n</figure>\n\nThe
|
259
|
+
basic steps\u00a0are:\n\n1. **Query Formulation**: Transform the user input
|
260
|
+
into a query\n suitable for Knowledge Graph''s exploration.\n2. **Knowledge
|
261
|
+
Graphs:** You can use either Neo4j or\n [NebulaGraph](https://www.nebula-graph.io/)
|
262
|
+
to implement a retrieval\n enhancement technique. This technique involves
|
263
|
+
utilizing a knowledge\n graph to illustrate the connections between entities
|
264
|
+
and\n relationships. Additionally, it incorporates a powerful language\n model
|
265
|
+
to improve the retrieval process.\n3. **Fact Selection**: Employ entity linking
|
266
|
+
and reasoning algorithms\n to select and prioritize the most relevant facts
|
267
|
+
based on the query\n and\u00a0context.\n4. **Natural Language Generation**
|
268
|
+
(**NLG**): Utilise specialized NLG\n models like\n [BART](https://research.facebook.com/publications/controllable-abstractive-summarization/)\n to
|
269
|
+
translate the extracted facts into a natural language response.\n5. **Refinement**:
|
270
|
+
Enhance the generated response for clarity and\n coherence.\n\n### **Unveiling
|
271
|
+
a Future of Intelligent Interaction**\n\nThe combination of RAG and Knowledge
|
272
|
+
Graphs goes beyond just being a\ntechnological fusion. It paves the way for
|
273
|
+
a future where the\ninteraction between humans and computers goes beyond simple
|
274
|
+
words and\nbecomes a more informed and refined form of communication. As these\ntechnologies
|
275
|
+
continue to develop, we can expect to witness a significant\ntransformation
|
276
|
+
in:\n\n- AI-powered assistants that answer your questions with the confidence\n of
|
277
|
+
a well-read friend, seamlessly combining relevant facts and\n insights gleaned
|
278
|
+
from Knowledge Graphs.\n- Next-generation search engines that go beyond keyword
|
279
|
+
matching,\n understanding the deeper meaning behind your queries and delivering\n comprehensive,
|
280
|
+
contextual results enriched with information from\n Knowledge Graphs.\n-
|
281
|
+
Creative writing tools that utilize RAG and Knowledge Graphs to\n generate
|
282
|
+
stories that are both factually accurate and full of\n unexpected plot twists
|
283
|
+
and character development, moving beyond\n clich\u00e9d patterns.\n\n###
|
284
|
+
**Conclusion**\n\nThe convergence of Retrieval Augmented Generation (RAG)
|
285
|
+
and Knowledge\nGraphs (KGs) brings about an exciting synergy in the world
|
286
|
+
of Natural\nLanguage Processing (NLP). RAG enhances the output of large language\nmodels
|
287
|
+
by carefully selecting relevant information from external sources\nand KGs,
|
288
|
+
allowing for well-informed and detailed responses. KGs, on the\nother hand,
|
289
|
+
provide a structured representation of real-world entities\nand their relationships,
|
290
|
+
enabling the exploration of hidden insights and\nthe discovery of complex
|
291
|
+
connections.\n\nThe integration of RAG and KGs opens up two pipeline possibilities.
|
292
|
+
The\nLLM-centric pipeline prioritizes the language model''s output, which
|
293
|
+
is\nthen enriched with information retrieved from KGs. The Knowledge\nGraphs-centric
|
294
|
+
pipeline, on the other hand, places KGs at the center,\nutilizing RAG techniques
|
295
|
+
to translate the structured insights into\ncompelling and informative language.\n\nWhile
|
296
|
+
integrating LLMs and a knowledge graph for content retrieval\nrequires careful
|
297
|
+
planning, the reward is significant. You can gain\naccess to hidden relationships
|
298
|
+
within information, ultimately leading to\nhigher-quality output information.\n\nTools
|
299
|
+
like **OpenAI**, **Langchain**, and **LlamaIndex** provide\nready-made pipelines
|
300
|
+
to integrate knowledge graphs (like **Neo4j**)\neasily. Meanwhile, open-source
|
301
|
+
LLMs like **Mistral**, **Llama**, and\n**Dolphin** are catching up to proprietary
|
302
|
+
models in performance, making\nthem attractive choices for building custom
|
303
|
+
architectures. This\nopen-source scenario allows for the exploration and examination
|
304
|
+
of\nvarious methods before fully committing to a particular technological\nframework.
|
305
|
+
So, it is crucial to evaluate your needs and choose the\napproach that best
|
306
|
+
fits your use\u00a0case.\n\n![](https://medium.com/_/stat?event=post.clientViewed&referrerSource=full_rss&postId=fc0a6900f7eb){width=\"1\"\nheight=\"1\"}\n","doi":"https://doi.org/10.59350/jhrs4-22440","guid":"https://medium.com/p/fc0a6900f7eb","id":"05f01f68-ef81-47d7-a3c1-40aba91d358f","image":"https://cdn-images-1.medium.com/max/1024/1*bJ3eWZ7301vYDzBomwdLfQ.png","indexed_at":1706690571,"language":"en","published_at":1705557796,"reference":[],"relationships":[],"summary":"<strong>\n
|
307
|
+
Tools and Platform for Integration of Knowledge Graph with RAG pipelines.\n</strong>\nAuthors:
|
308
|
+
Aland Astudillo, Aishwarya Nambissan Many users of chatbots such as ChatGPT,
|
309
|
+
have encountered the problem of receiving inappropriate or incompatible responses.
|
310
|
+
There are several reasons why this might\u00a0happen. One reason is the lack
|
311
|
+
of appropriate training data, as chatbots are usually trained on large amounts
|
312
|
+
of text and code.","tags":["Artificial-intelligence","Machine-learning","Retrieval-augmented","Knowledge-graph"],"title":"Unveiling
|
313
|
+
the Synergy: Retrieval Augmented Generation (RAG) Meets Knowledge Graphs","updated_at":1705557796,"url":"https://medium.com/@researchgraph/unveiling-the-synergy-retrieval-augmented-generation-rag-meets-knowledge-graphs-fc0a6900f7eb"}
|
314
|
+
|
315
|
+
'
|
316
|
+
recorded_at: Wed, 31 Jan 2024 19:50:01 GMT
|
317
|
+
recorded_with: VCR 6.2.0
|
@@ -10,7 +10,7 @@ describe Commonmeta::Metadata, vcr: true do
|
|
10
10
|
context 'read commonmeta metadata' do
|
11
11
|
it "default" do
|
12
12
|
expect(subject.valid?).to be true
|
13
|
-
expect(subject.schema_version).to eq("https://commonmeta.org/commonmeta_v0.10
|
13
|
+
expect(subject.schema_version).to eq("https://commonmeta.org/commonmeta_v0.10")
|
14
14
|
expect(subject.id).to eq("https://doi.org/10.7554/elife.01567")
|
15
15
|
expect(subject.type).to eq("JournalArticle")
|
16
16
|
expect(subject.url).to eq("https://elifesciences.org/articles/01567")
|
@@ -36,13 +36,12 @@ describe Commonmeta::Metadata, vcr: true do
|
|
36
36
|
# expect(subject.valid?).to be true
|
37
37
|
expect(subject.id).to eq("https://doi.org/10.5063/f1m61h5x")
|
38
38
|
expect(subject.type).to eq("Software")
|
39
|
-
expect(subject.contributors).to eq([{"contributorRoles"=>["Author"],
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
expect(subject.
|
44
|
-
expect(subject.
|
45
|
-
expect(subject.publisher).to eq("name"=>"KNB Data Repository")
|
39
|
+
expect(subject.contributors).to eq([{ "contributorRoles" => ["Author"],
|
40
|
+
"name" => "Jones, Matthew B.; Slaughter, Peter; Nahf, Rob; Boettiger, Carl ; Jones, Chris; Read, Jordan; Walker, Lauren; Hart, Edmund; Chamberlain, Scott",
|
41
|
+
"type" => "Organization" }])
|
42
|
+
expect(subject.titles).to eq([{ "title" => "dataone: R interface to the DataONE network of data repositories" }])
|
43
|
+
expect(subject.date).to eq("created" => "2016-03-12", "published" => "2016", "registered" => "2016-03-12", "updated" => "2020-09-18")
|
44
|
+
expect(subject.publisher).to eq("name" => "KNB Data Repository")
|
46
45
|
expect(subject.provider).to eq("DataCite")
|
47
46
|
end
|
48
47
|
|
@@ -70,8 +69,8 @@ describe Commonmeta::Metadata, vcr: true do
|
|
70
69
|
expect(subject.type).to eq("Dissertation")
|
71
70
|
expect(subject.contributors.length).to eq(3)
|
72
71
|
expect(subject.contributors.first).to eq("type" => "Person", "contributorRoles" => ["Author"],
|
73
|
-
|
74
|
-
expect(subject.contributors.last).to eq("id"=>"https://orcid.org/0000-0002-8633-8234", "type"=>"Person", "contributorRoles"=>["Supervision"], "givenName"=>"Gerhard", "familyName"=>"Gruebel", "affiliation"=>[{"name"=>"Deutsches Elektronen-Synchrotron"}])
|
72
|
+
"givenName" => "Heiko", "familyName" => "Conrad")
|
73
|
+
expect(subject.contributors.last).to eq("id" => "https://orcid.org/0000-0002-8633-8234", "type" => "Person", "contributorRoles" => ["Supervision"], "givenName" => "Gerhard", "familyName" => "Gruebel", "affiliation" => [{ "name" => "Deutsches Elektronen-Synchrotron" }])
|
75
74
|
expect(subject.titles).to eq([{ "title" => "Dynamics of colloids in molecular glass forming liquids studied via X-ray photon correlation spectroscopy" }])
|
76
75
|
expect(subject.date).to eq("created" => "2018-01-25", "published" => "2014",
|
77
76
|
"registered" => "2018-01-25", "updated" => "2020-09-19")
|
@@ -91,7 +90,7 @@ describe Commonmeta::Metadata, vcr: true do
|
|
91
90
|
"affiliation" => [{ "name" => "Тверская государственная сельскохозяйственная академия" }], "familyName" => "Ганичева", "givenName" => "А.В.", "type" => "Person", "contributorRoles" => ["Author"],
|
92
91
|
)
|
93
92
|
expect(subject.titles.last).to eq("title" => "MODEL OF SYSTEM DYNAMICS OF PROCESS OF TRAINING",
|
94
|
-
"
|
93
|
+
"type" => "TranslatedTitle")
|
95
94
|
expect(subject.date).to eq("created" => "2019-02-12", "published" => "2019",
|
96
95
|
"registered" => "2019-02-12", "updated" => "2022-08-23")
|
97
96
|
expect(subject.publisher).to eq("name" => "МОДЕЛИРОВАНИЕ, ОПТИМИЗАЦИЯ И ИНФОРМАЦИОННЫЕ ТЕХНОЛОГИИ")
|
@@ -115,10 +114,14 @@ describe Commonmeta::Metadata, vcr: true do
|
|
115
114
|
expect(subject.contributors.first).to eq(
|
116
115
|
"name" => "Europäische Kommission", "contributorRoles" => ["Author"], "type" => "Organization",
|
117
116
|
)
|
118
|
-
expect(subject.titles).to eq([
|
119
|
-
|
120
|
-
|
121
|
-
|
117
|
+
expect(subject.titles).to eq([{ "language" => "de", "title" => "Flash Eurobarometer 54 (Madrid Summit)" },
|
118
|
+
{ "language" => "en", "title" => "Flash Eurobarometer 54 (Madrid Summit)" },
|
119
|
+
{ "language" => "de",
|
120
|
+
"title" => "The Common European Currency",
|
121
|
+
"type" => "Subtitle" },
|
122
|
+
{ "language" => "en",
|
123
|
+
"title" => "The Common European Currency",
|
124
|
+
"type" => "Subtitle" }])
|
122
125
|
expect(subject.subjects).to eq([{ "lang" => "en",
|
123
126
|
"subject" => "KAT12 International Institutions, Relations, Conditions",
|
124
127
|
"subjectScheme" => "ZA" },
|
@@ -155,5 +158,56 @@ describe Commonmeta::Metadata, vcr: true do
|
|
155
158
|
expect(subject.license).to eq("id" => "CC-BY-4.0",
|
156
159
|
"url" => "https://creativecommons.org/licenses/by/4.0/legalcode")
|
157
160
|
end
|
161
|
+
|
162
|
+
it "dataset schema v4.5" do
|
163
|
+
input = "#{fixture_path}datacite-dataset_v4.5.json"
|
164
|
+
subject = described_class.new(input: input)
|
165
|
+
expect(subject.id).to eq("https://doi.org/10.82433/b09z-4k37")
|
166
|
+
expect(subject.type).to eq("Dataset")
|
167
|
+
expect(subject.contributors.length).to eq(23)
|
168
|
+
expect(subject.contributors[0]).to eq("contributorRoles" => ["Author"], "familyName" => "ExampleFamilyName", "givenName" => "ExampleGivenName", "type" => "Person")
|
169
|
+
expect(subject.contributors[2]).to eq("contributorRoles" => ["ContactPerson"], "familyName" => "ExampleFamilyName", "givenName" => "ExampleGivenName", "type" => "Person")
|
170
|
+
expect(subject.date).to eq("created" => "2022-10-27", "published" => "2022", "registered" => "2022-10-27", "updated" => "2024-01-02")
|
171
|
+
expect(subject.publisher).to eq("name" => "Example Publisher")
|
172
|
+
expect(subject.titles).to eq([{ "language" => "en", "title" => "Example Title" },
|
173
|
+
{ "language" => "en", "title" => "Example Subtitle", "type" => "Subtitle" },
|
174
|
+
{ "language" => "fr",
|
175
|
+
"title" => "Example TranslatedTitle",
|
176
|
+
"type" => "TranslatedTitle" },
|
177
|
+
{ "language" => "en",
|
178
|
+
"title" => "Example AlternativeTitle",
|
179
|
+
"type" => "AlternativeTitle" }])
|
180
|
+
expect(subject.descriptions).to eq([{ "description" => "Example Abstract",
|
181
|
+
"type" => "Abstract",
|
182
|
+
"language" => "en" },
|
183
|
+
{ "description" => "Example Methods",
|
184
|
+
"type" => "Methods",
|
185
|
+
"language" => "en" },
|
186
|
+
{ "description" => "Example SeriesInformation",
|
187
|
+
"type" => "Other",
|
188
|
+
"language" => "en" },
|
189
|
+
{ "description" => "Example TableOfContents",
|
190
|
+
"type" => "Other",
|
191
|
+
"language" => "en" },
|
192
|
+
{ "description" => "Example TechnicalInfo",
|
193
|
+
"type" => "TechnicalInfo",
|
194
|
+
"language" => "en" },
|
195
|
+
{ "description" => "Example Other", "type" => "Other", "language" => "en" }])
|
196
|
+
expect(subject.license).to eq("id" => "CC-PDDC", "url" => "https://creativecommons.org/licenses/publicdomain/")
|
197
|
+
end
|
198
|
+
|
199
|
+
it "instrument" do
|
200
|
+
input = "#{fixture_path}datacite-instrument.json"
|
201
|
+
subject = described_class.new(input: input)
|
202
|
+
puts subject.errors unless subject.valid?
|
203
|
+
expect(subject.valid?).to be true
|
204
|
+
expect(subject.id).to eq("https://doi.org/10.82433/08qf-ee96")
|
205
|
+
expect(subject.type).to eq("Instrument")
|
206
|
+
expect(subject.contributors.length).to eq(2)
|
207
|
+
expect(subject.contributors.first).to eq("contributorRoles" => ["Author"], "name" => "DECTRIS", "type" => "Organization", "id" => "https://www.wikidata.org/wiki/Q107529885")
|
208
|
+
expect(subject.date).to eq("created" => "2022-10-20", "published" => "2022", "registered" => "2022-10-20", "updated" => "2024-01-02")
|
209
|
+
expect(subject.publisher).to eq("name" => "Helmholtz Centre Potsdam - GFZ German Research Centre for Geosciences")
|
210
|
+
expect(subject.license).to be_nil
|
211
|
+
end
|
158
212
|
end
|
159
213
|
end
|
@@ -189,6 +189,31 @@ describe Commonmeta::Metadata, vcr: true do
|
|
189
189
|
expect(subject.references).to be_nil
|
190
190
|
end
|
191
191
|
|
192
|
+
it "medium post with institutional author" do
|
193
|
+
input = "https://api.rogue-scholar.org/posts/05f01f68-ef81-47d7-a3c1-40aba91d358f"
|
194
|
+
subject = described_class.new(input: input)
|
195
|
+
# expect(subject.valid?).to be true
|
196
|
+
expect(subject.id).to eq("https://doi.org/10.59350/jhrs4-22440")
|
197
|
+
expect(subject.url).to eq("https://medium.com/@researchgraph/unveiling-the-synergy-retrieval-augmented-generation-rag-meets-knowledge-graphs-fc0a6900f7eb")
|
198
|
+
expect(subject.alternate_identifiers).to eq([{ "alternateIdentifier" => "05f01f68-ef81-47d7-a3c1-40aba91d358f", "alternateIdentifierType" => "UUID" }])
|
199
|
+
expect(subject.type).to eq("Article")
|
200
|
+
expect(subject.contributors.length).to eq(1)
|
201
|
+
expect(subject.contributors.first).to eq("contributorRoles"=>["Author"], "name"=>"Research Graph", "type"=>"Organization")
|
202
|
+
expect(subject.titles).to eq([{ "title" => "Unveiling the Synergy: Retrieval Augmented Generation (RAG) Meets Knowledge Graphs" }])
|
203
|
+
expect(subject.license).to eq("id" => "CC-BY-4.0",
|
204
|
+
"url" => "https://creativecommons.org/licenses/by/4.0/legalcode")
|
205
|
+
expect(subject.date).to eq("published"=>"2024-01-18", "updated"=>"2024-01-18")
|
206
|
+
expect(subject.descriptions.first["description"]).to start_with("<strong> Tools and Platform for Integration of Knowledge Graph with RAG pipelines.")
|
207
|
+
expect(subject.publisher).to eq("name" => "Research Graph")
|
208
|
+
expect(subject.subjects).to eq([{ "subject" => "Computer and information sciences" },
|
209
|
+
{ "schemeUri" => "http://www.oecd.org/science/inno/38235147.pdf",
|
210
|
+
"subject" => "FOS: Computer and information sciences",
|
211
|
+
"subjectScheme" => "Fields of Science and Technology (FOS)" }])
|
212
|
+
expect(subject.language).to eq("en")
|
213
|
+
expect(subject.container).to eq("identifier" => "https://medium.com/@researchgraph", "identifierType" => "URL", "title" => "Research Graph", "type" => "Periodical")
|
214
|
+
expect(subject.references).to be_nil
|
215
|
+
end
|
216
|
+
|
192
217
|
it "syldavia gazette post with references" do
|
193
218
|
input = "https://api.rogue-scholar.org/posts/0022b9ef-525a-4a79-81ad-13411697f58a"
|
194
219
|
subject = described_class.new(input: input)
|
data/spec/utils_spec.rb
CHANGED
@@ -502,7 +502,7 @@ describe Commonmeta::Metadata do
|
|
502
502
|
links = [{ "rel" => "self", "type" => "application/atom+xml", "href" => "https://syldavia-gazette.org/atom/" },
|
503
503
|
{ "rel" => "alternate", "type" => "text/html", "href" => "https://syldavia-gazette.org" },
|
504
504
|
{ "rel" => "license", "type" => "text/html", "href" => "https://creativecommons.org/licenses/by/4.0/legalcode" }]
|
505
|
-
|
505
|
+
|
506
506
|
it "url" do
|
507
507
|
response = subject.get_link(links, "self")
|
508
508
|
expect(response).to eq("https://syldavia-gazette.org/atom/")
|
@@ -721,7 +721,7 @@ describe Commonmeta::Metadata do
|
|
721
721
|
it "decode doi to uuid" do
|
722
722
|
doi = "https://doi.org/10.53731/6315bn4-aqg82ja-4a9wxdt-29f7279"
|
723
723
|
response = subject.decode_doi(doi, uuid: true)
|
724
|
-
expect(response).to eq(
|
724
|
+
expect(response).to eq("255d48ab-c102-9288-a4f3-add092f388e9")
|
725
725
|
end
|
726
726
|
end
|
727
727
|
|
@@ -745,10 +745,36 @@ describe Commonmeta::Metadata do
|
|
745
745
|
end
|
746
746
|
end
|
747
747
|
|
748
|
-
context
|
749
|
-
it
|
748
|
+
context "json_feed_unregistered_url" do
|
749
|
+
it "all posts" do
|
750
750
|
response = subject.json_feed_unregistered_url
|
751
751
|
expect(response).to eq("https://api.rogue-scholar.org/posts/unregistered")
|
752
752
|
end
|
753
753
|
end
|
754
|
+
|
755
|
+
context "normalize_name_identifier" do
|
756
|
+
it "ORCID" do
|
757
|
+
hsh = {"schemeUri"=>"https://orcid.org", "nameIdentifier"=>"https://orcid.org/0000-0003-1419-2405", "nameIdentifierScheme"=>"ORCID"}
|
758
|
+
response = subject.normalize_name_identifier(hsh)
|
759
|
+
expect(response).to eq("https://orcid.org/0000-0003-1419-2405")
|
760
|
+
end
|
761
|
+
|
762
|
+
it "ROR" do
|
763
|
+
hsh = { "schemeUri" => "https://ror.org", "nameIdentifier" => "https://ror.org/02aj13c28", "nameIdentifierScheme" => "ROR" }
|
764
|
+
response = subject.normalize_name_identifier(hsh)
|
765
|
+
expect(response).to eq("https://ror.org/02aj13c28")
|
766
|
+
end
|
767
|
+
|
768
|
+
it "ISNI" do
|
769
|
+
hsh = { "schemeUri" => "http://isni.org/isni/", "nameIdentifier" => "0000000134596520", "nameIdentifierScheme" => "ISNI" }
|
770
|
+
response = subject.normalize_name_identifier(hsh)
|
771
|
+
expect(response).to eq("https://isni.org/isni/0000000134596520")
|
772
|
+
end
|
773
|
+
|
774
|
+
it "Wikidata" do
|
775
|
+
hsh = {"schemeUri"=>"https://www.wikidata.org/wiki/", "nameIdentifier"=>"Q107529885", "nameIdentifierScheme"=>"Wikidata"}
|
776
|
+
response = subject.normalize_name_identifier(hsh)
|
777
|
+
expect(response).to eq("https://www.wikidata.org/wiki/Q107529885")
|
778
|
+
end
|
779
|
+
end
|
754
780
|
end
|
@@ -33,11 +33,38 @@ describe Commonmeta::Metadata, vcr: true do
|
|
33
33
|
"volume" => "426",
|
34
34
|
"firstPage" => "181",
|
35
35
|
"containerTitle" => "Nature")
|
36
|
-
expect(json["date"]).to eq("published"=>"2014-02-11", "updated"=>"2022-03-26")
|
36
|
+
expect(json["date"]).to eq("published" => "2014-02-11", "updated" => "2022-03-26")
|
37
37
|
expect(json["descriptions"].first["description"]).to start_with("Among various advantages,")
|
38
|
-
expect(json["license"]).to eq("id"=>"CC-BY-3.0", "url"=>"https://creativecommons.org/licenses/by/3.0/legalcode")
|
38
|
+
expect(json["license"]).to eq("id" => "CC-BY-3.0", "url" => "https://creativecommons.org/licenses/by/3.0/legalcode")
|
39
39
|
expect(json["provider"]).to eq("Crossref")
|
40
|
-
expect(json["files"].first).to eq("mimeType"=>"application/pdf", "url"=>"https://cdn.elifesciences.org/articles/01567/elife-01567-v1.pdf")
|
40
|
+
expect(json["files"].first).to eq("mimeType" => "application/pdf", "url" => "https://cdn.elifesciences.org/articles/01567/elife-01567-v1.pdf")
|
41
|
+
end
|
42
|
+
|
43
|
+
it "dataset schema v4.5" do
|
44
|
+
input = "#{fixture_path}datacite-dataset_v4.5.json"
|
45
|
+
subject = described_class.new(input: input)
|
46
|
+
expect(subject.id).to eq("https://doi.org/10.82433/b09z-4k37")
|
47
|
+
json = JSON.parse(subject.commonmeta)
|
48
|
+
expect(json["id"]).to eq("https://doi.org/10.82433/b09z-4k37")
|
49
|
+
expect(json["type"]).to eq("Dataset")
|
50
|
+
expect(json["titles"]).to eq([{ "language" => "en", "title" => "Example Title" },
|
51
|
+
{ "language" => "en", "title" => "Example Subtitle", "type" => "Subtitle" },
|
52
|
+
{ "language" => "fr",
|
53
|
+
"title" => "Example TranslatedTitle",
|
54
|
+
"type" => "TranslatedTitle" },
|
55
|
+
{ "language" => "en",
|
56
|
+
"title" => "Example AlternativeTitle",
|
57
|
+
"type" => "AlternativeTitle" }])
|
58
|
+
expect(json["descriptions"]).to eq([{ "description" => "Example Abstract", "language" => "en", "type" => "Abstract" },
|
59
|
+
{ "description" => "Example Methods", "language" => "en", "type" => "Methods" },
|
60
|
+
{ "description" => "Example SeriesInformation",
|
61
|
+
"language" => "en",
|
62
|
+
"type" => "Other" },
|
63
|
+
{ "description" => "Example TableOfContents", "language" => "en", "type" => "Other" },
|
64
|
+
{ "description" => "Example TechnicalInfo",
|
65
|
+
"language" => "en",
|
66
|
+
"type" => "TechnicalInfo" },
|
67
|
+
{ "description" => "Example Other", "language" => "en", "type" => "Other" }])
|
41
68
|
end
|
42
69
|
end
|
43
70
|
end
|
@@ -7,6 +7,7 @@ describe Commonmeta::Metadata, vcr: true do
|
|
7
7
|
it 'Dataset' do
|
8
8
|
input = 'https://doi.org/10.5061/DRYAD.8515'
|
9
9
|
subject = described_class.new(input: input, from: 'datacite')
|
10
|
+
puts subject.errors unless subject.valid?
|
10
11
|
expect(subject.valid?).to be true
|
11
12
|
json = JSON.parse(subject.csl)
|
12
13
|
expect(json['type']).to eq('dataset')
|
@@ -37,6 +37,7 @@ describe Commonmeta::Metadata, vcr: true do
|
|
37
37
|
it 'text' do
|
38
38
|
input = 'https://doi.org/10.3204/desy-2014-01645'
|
39
39
|
subject = described_class.new(input: input, from: 'datacite')
|
40
|
+
puts subject.errors unless subject.valid?
|
40
41
|
expect(subject.valid?).to be true
|
41
42
|
csv = subject.csv.parse_csv
|
42
43
|
|
@@ -136,7 +136,6 @@ describe Commonmeta::Metadata, vcr: true do
|
|
136
136
|
it 'from schema.org' do
|
137
137
|
input = 'https://blog.front-matter.io/posts/eating-your-own-dog-food/'
|
138
138
|
subject = described_class.new(input: input, from: 'schema_org')
|
139
|
-
puts subject.errors
|
140
139
|
expect(subject.valid?).to be true
|
141
140
|
datacite = JSON.parse(subject.datacite)
|
142
141
|
expect(datacite.fetch('titles')).to eq([{ 'title' => 'Eating your own Dog Food' }])
|