commonmeta-ruby 3.3.15 → 3.3.17
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/lib/commonmeta/readers/json_feed_reader.rb +8 -8
- data/lib/commonmeta/utils.rb +14 -0
- data/lib/commonmeta/version.rb +1 -1
- data/spec/cli_spec.rb +2 -2
- data/spec/fixtures/vcr_cassettes/Commonmeta_CLI/json_feed/json_feed_by_blog.yml +189 -1704
- data/spec/fixtures/vcr_cassettes/Commonmeta_CLI/json_feed/json_feed_unregistered.yml +54 -1969
- data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/get_json_feed/by_blog_id.yml +210 -518
- data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/get_json_feed/not_indexed_posts.yml +6 -1450
- data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/get_json_feed/unregistered_posts.yml +54 -8
- data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/get_json_feed_item_metadata/blog_post_with_non-url_id.yml +18 -88
- data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/get_json_feed_item_metadata/blogger_post.yml +11 -50
- data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/get_json_feed_item_metadata/ghost_post_with_author_name_suffix.yml +64 -173
- data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/get_json_feed_item_metadata/ghost_post_with_doi.yml +13 -75
- data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/get_json_feed_item_metadata/ghost_post_with_institutional_author.yml +15 -11
- data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/get_json_feed_item_metadata/ghost_post_with_organizational_author.yml +15 -46
- data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/get_json_feed_item_metadata/ghost_post_without_doi.yml +17 -155
- data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/get_json_feed_item_metadata/jekyll_post.yml +14 -45
- data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/get_json_feed_item_metadata/jekyll_post_with_anonymous_author.yml +13 -16
- data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/get_json_feed_item_metadata/substack_post_with_broken_reference.yml +938 -1865
- data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/get_json_feed_item_metadata/syldavia_gazette_post_with_references.yml +128 -267
- data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/get_json_feed_item_metadata/upstream_post_with_references.yml +570 -1279
- data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/get_json_feed_item_metadata/wordpress_post.yml +12 -119
- data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/get_json_feed_item_metadata/wordpress_post_with_many_references.yml +4095 -5759
- data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/get_json_feed_item_metadata/wordpress_post_with_references.yml +24 -220
- data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/get_json_feed_item_metadata/wordpress_post_with_tracking_code_on_url.yml +13 -12
- data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/write_metadata_as_crossref/json_feed_item_from_rogue_scholar_with_anonymous_author.yml +13 -16
- data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/write_metadata_as_crossref/json_feed_item_from_rogue_scholar_with_doi.yml +12 -119
- data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/write_metadata_as_crossref/json_feed_item_from_rogue_scholar_with_organizational_author.yml +15 -46
- data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/write_metadata_as_crossref/json_feed_item_from_upstream_blog.yml +13 -201
- data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/write_metadata_as_crossref/json_feed_item_with_references.yml +104 -813
- data/spec/readers/json_feed_reader_spec.rb +22 -24
- data/spec/utils_spec.rb +40 -0
- data/spec/writers/crossref_xml_writer_spec.rb +4 -4
- metadata +2 -5
- data/spec/fixtures/vcr_cassettes/Commonmeta_CLI/json_feed/blog_post.yml +0 -360
- data/spec/fixtures/vcr_cassettes/Commonmeta_CLI/json_feed/blog_post_uuid.yml +0 -980
- data/spec/fixtures/vcr_cassettes/Commonmeta_CLI/json_feed_unregistered/blog_post_uuid.yml +0 -49
@@ -23,23 +23,23 @@ http_interactions:
|
|
23
23
|
Cache-Control:
|
24
24
|
- public, max-age=0, must-revalidate
|
25
25
|
Content-Length:
|
26
|
-
- '
|
26
|
+
- '26135'
|
27
27
|
Content-Type:
|
28
28
|
- application/json; charset=utf-8
|
29
29
|
Date:
|
30
|
-
-
|
30
|
+
- Mon, 10 Jul 2023 20:41:57 GMT
|
31
31
|
Etag:
|
32
|
-
- '"
|
32
|
+
- '"a986ggx9d0k3r"'
|
33
33
|
Server:
|
34
34
|
- Vercel
|
35
35
|
Strict-Transport-Security:
|
36
36
|
- max-age=63072000
|
37
37
|
X-Matched-Path:
|
38
|
-
- "/api/blogs/[
|
38
|
+
- "/api/blogs/[[...params]]"
|
39
39
|
X-Vercel-Cache:
|
40
40
|
- MISS
|
41
41
|
X-Vercel-Id:
|
42
|
-
- fra1::iad1::
|
42
|
+
- fra1::iad1::d4g88-1689021716969-a7ff58d7e07e
|
43
43
|
Connection:
|
44
44
|
- close
|
45
45
|
body:
|
@@ -49,55 +49,17 @@ http_interactions:
|
|
49
49
|
more ranty and less considered opinions, see my <a href=\"https://twitter.com/rdmpage\">Twitter
|
50
50
|
feed</a>.<br>ISSN 2051-8188. Written content on this site is licensed under
|
51
51
|
a <a href=\"https://creativecommons.org/licenses/by/4.0/\">Creative Commons
|
52
|
-
Attribution 4.0 International license</a>.","language":"en","favicon":null,"feed_url":"https://iphylo.blogspot.com/feeds/posts/default","feed_format":"application/atom+xml","home_page_url":"https://iphylo.blogspot.com/","indexed_at":"2023-02-06","modified_at":"2023-
|
53
|
-
7.00","category":"Natural Sciences","backlog":true,"prefix":"10.59350","items":[{"id":"https://doi.org/10.59350/
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
about this</a> but want to bookmark it for later as well. The paper “A molecular-based
|
64
|
-
identification resource for the arthropods of Finland” <a href=\"https://doi.org/10.1111/1755-0998.13510\">doi:10.1111/1755-0998.13510</a>
|
65
|
-
contains the following:</p>\n<blockquote>\n<p>…the annotated barcode records
|
66
|
-
assembled by FinBOL participants represent a tremendous <mark>intergenerational
|
67
|
-
transfer of taxonomic knowledge</mark> … the time contributed by current taxonomists
|
68
|
-
in identifying and contributing voucher specimens represents a great gift
|
69
|
-
to future generations who will benefit from their expertise when they are
|
70
|
-
no longer able to process new material.</p>\n</blockquote>\n<p>I think this
|
71
|
-
is a very clever way to characterise the project. In an age of machine learning
|
72
|
-
this may be commonest way to share knowledge , namely as expert-labelled training
|
73
|
-
data used to build tools for others. Of course, this means the expertise itself
|
74
|
-
may be lost, which has implications for updating the models if the data isn’t
|
75
|
-
complete. But it speaks to Charles Godfrey’s theme of <a href=\"https://biostor.org/reference/250587\">“Taxonomy
|
76
|
-
as information science”</a>.</p>\n<p>Note that the knowledge is also transformed
|
77
|
-
in the sense that the underlying expertise of interpreting morphology, ecology,
|
78
|
-
behaviour, genomics, and the past literature is not what is being passed on.
|
79
|
-
Instead it is probabilities that a DNA sequence belongs to a particular taxon.</p>\n<p>This
|
80
|
-
feels is different to, say iNaturalist, where there is a machine learning
|
81
|
-
model to identify images. In that case, the model is built on something the
|
82
|
-
community itself has created, and continues to create. Yes, the underlying
|
83
|
-
idea is that same: “experts” have labelled the data, a model is trained, the
|
84
|
-
model is used. But the benefits of the <a href=\"https://www.inaturalist.org\">iNaturalist</a>
|
85
|
-
model are immediately applicable to the people whose data built the model.
|
86
|
-
In the case of barcoding, because the technology itself is still not in the
|
87
|
-
hands of many (relative to, say, digital imaging), the benefits are perhaps
|
88
|
-
less tangible. Obviously researchers working with environmental DNA will find
|
89
|
-
it very useful, but broader impact may await the arrival of citizen science
|
90
|
-
DNA barcoding.</p>\n<p>The other consideration is whether the barcoding helps
|
91
|
-
taxonomists. Is it to be used to help prioritise future work (“we are getting
|
92
|
-
lots of unknown sequences in these taxa, lets do some taxonomy there”), or
|
93
|
-
is it simply capturing the knowledge of a generation that won’t be replaced:</p>\n<blockquote>\n<p>The
|
94
|
-
need to capture such knowledge is essential because there are, for example,
|
95
|
-
no young Finnish taxonomists who can critically identify species in many key
|
96
|
-
groups of ar- thropods (e.g., aphids, chewing lice, chalcid wasps, gall midges,
|
97
|
-
most mite lineages).</p>\n</blockquote>\n<p>The cycle of collect data, test
|
98
|
-
and refine model, collect more data, rinse and repeat that happens with iNaturalist
|
99
|
-
creates a feedback loop. It’s not clear that a similar cycle exists for DNA
|
100
|
-
barcoding.</p>\n<blockquote>\n<p>Written with <a href=\"https://stackedit.io/\">StackEdit</a>.</p>\n</blockquote>","tags":[],"language":"en","references":[]},{"id":"https://doi.org/10.59350/d3dc0-7an69","uuid":"545c177f-cea5-4b79-b554-3ccae9c789d7","url":"https://iphylo.blogspot.com/2021/10/reflections-on-macroscope-tool-for-21st.html","title":"Reflections
|
52
|
+
Attribution 4.0 International license</a>.","language":"en","favicon":null,"feed_url":"https://iphylo.blogspot.com/feeds/posts/default","current_feed_url":null,"feed_format":"application/atom+xml","home_page_url":"https://iphylo.blogspot.com/","indexed_at":"2023-02-06","modified_at":"2023-06-17T15:38:20+00:00","license":"https://creativecommons.org/licenses/by/4.0/legalcode","generator":"Blogger
|
53
|
+
7.00","category":"Natural Sciences","backlog":true,"prefix":"10.59350","expired":null,"items":[{"id":"37538c38-66e6-4ac4-ab5c-679684622ade","doi":"https://doi.org/10.59350/2b1j9-qmw12","url":"https://iphylo.blogspot.com/2022/05/round-trip-from-identifiers-to.html","title":"Round
|
54
|
+
trip from identifiers to citations and back again","summary":"Note to self
|
55
|
+
(basically rewriting last year''s Finding citations of specimens). Bibliographic
|
56
|
+
data supports going from identifier to citation string and back again, so
|
57
|
+
we can do a \"round trip.\" 1. Given a DOI we can get structured data with
|
58
|
+
a simple HTTP fetch, then use a tool such as citation.js to convert that data
|
59
|
+
into a human-readable string in a variety of formats. Identifier ⟶ Structured
|
60
|
+
data ⟶ Human readable string 10.7717/peerj-cs.214 HTTP with...","published_at":1653669240,"updated_at":1653669259,"indexed_at":1689006804,"authors":[{"url":"https://orcid.org/0000-0002-7101-9767","name":"Roderic
|
61
|
+
Page"}],"image":null,"tags":["citation","GBIF","material examined","specimen
|
62
|
+
codes"],"language":"en","reference":[]},{"id":"545c177f-cea5-4b79-b554-3ccae9c789d7","doi":"https://doi.org/10.59350/d3dc0-7an69","url":"https://iphylo.blogspot.com/2021/10/reflections-on-macroscope-tool-for-21st.html","title":"Reflections
|
101
63
|
on \"The Macroscope\" - a tool for the 21st Century?","summary":"This is a
|
102
64
|
guest post by Tony Rees. It would be difficult to encounter a scientist, or
|
103
65
|
anyone interested in science, who is not familiar with the microscope, a tool
|
@@ -105,171 +67,95 @@ http_interactions:
|
|
105
67
|
by the unaided eye, or to reveal otherwise invisible fine detail in larger
|
106
68
|
objects. A select few with a particular interest in microscopy may also have
|
107
69
|
encountered the Wild-Leica \"Macroscope\", a specialised type of benchtop
|
108
|
-
microscope optimised for...","
|
109
|
-
Page"}],"image":null,"
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
is
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
2022 we will use machine-learning algorithms and software to help us organize
|
198
|
-
information about the physical world, helping bring the vast and complex data
|
199
|
-
gathered by billions of devices within the range of our vision and understanding.
|
200
|
-
We call this a \"macroscope\" – but unlike the microscope to see the very
|
201
|
-
small, or the telescope that can see far away, it is a system of software
|
202
|
-
and algorithms to bring all of Earth''s complex data together to analyze it
|
203
|
-
by space and time for meaning.\" (IBM)</blockquote>\n\n<blockquote>As the
|
204
|
-
Earth becomes increasingly instrumented with low-cost, high-bandwidth sensors,
|
205
|
-
we will gain a better understanding of our environment via a virtual, distributed
|
206
|
-
whole-Earth \"macroscope\"... Massive-scale data analytics will enable real-time
|
207
|
-
tracking of disease and targeted responses to potential pandemics. Our virtual
|
208
|
-
\"macroscope\" can now be used on ourselves, as well as on our planet.\" (Microsoft)
|
209
|
-
(references available via the Wikipedia article cited above).</blockquote>\n\n<p>Whether
|
210
|
-
or not the analytical capabilities described here are viewed as being an integral
|
211
|
-
part of the \"macroscope\" concept, or are maybe an add-on, is ultimately
|
212
|
-
a question of semantics and perhaps, personal opinion. Continuing the Census
|
213
|
-
of Marine Life/OBIS example, OBIS offers some (arguably rather basic) visualization
|
214
|
-
and summary tools, but also makes its data available for download to users
|
215
|
-
wishing to analyse it further according to their own particular interests;
|
216
|
-
using OBIS data in this manner, Mark Costello et al. in 2017 were able to
|
217
|
-
demarcate a finite number of data-supported marine biogeographic realms for
|
218
|
-
the first time (Costello et al. 2017: Nature Communications. 8: 1057. <a href=\"https://doi.org/10.1038/s41467-017-01121-2\">doi:10.1038/s41467-017-01121-2</a>),
|
219
|
-
a project which I was able to assist in a small way in an advisory capacity.
|
220
|
-
In a case such as this, perhaps the final function of the macroscope, namely
|
221
|
-
data visualization and analysis, was outsourced to the authors'' own research
|
222
|
-
institution. Similarly at an earlier phase, \"data aggregation\" can also
|
223
|
-
be virtual rather than actual, i.e. avoiding using a single physical system
|
224
|
-
to hold all the data, enabled by open web mapping standards WMS (web map service)
|
225
|
-
and WFS (web feature service) to access a set of distributed data stores,
|
226
|
-
e.g. as implemented on the portal for the <a href=\"https://portal.aodn.org.au/\">Australian
|
227
|
-
Ocean Data Network</a>.</p>\n\n<p>So, as we pass through the third decade
|
228
|
-
of the twenty first century, what developments await us in the \"macroscope\"
|
229
|
-
area\"? In the biodiversity space, one can reasonably presume that the existing
|
230
|
-
\"macroscopic\" data assembly projects such as OBIS and GBIF will continue,
|
231
|
-
and hopefully slowly fill current gaps in their coverage - although in the
|
232
|
-
marine area, strategic new data collection exercises may be required (Census
|
233
|
-
2020, or 2025, anyone?), while (again hopefully), the Catalogue of Life will
|
234
|
-
continue its progress towards a \"complete\" species inventory for the biosphere.
|
235
|
-
The Landsat project, with imagery dating back to 1972, continues with the
|
236
|
-
launch of its latest satellite Landsat 9 just this year (21 September 2021)
|
237
|
-
with a planned mission duration for the next 5 years, so the \"macroscope\"
|
238
|
-
functionality of that project seems set to continue for the medium term at
|
239
|
-
least. Meanwhile the ongoing development of sensor networks, both on land
|
240
|
-
and in the ocean, offers an exciting new method of \"instrumenting the earth\"
|
241
|
-
to obtain much more real time data than has ever been available in the past,
|
242
|
-
offering scope for many more, use case-specific \"macroscopes\" to be constructed
|
243
|
-
that can fuse (e.g.) satellite imagery with much more that is happening at
|
244
|
-
a local level.</p>\n\n<p>So, the \"macroscope\" concept appears to be alive
|
245
|
-
and well, even though the nomenclature can change from time to time (IBM''s
|
246
|
-
\"Macroscope\", foreshadowed in 2017, became the \"IBM Pairs Geoscope\" on
|
247
|
-
implementation, and is now simply the \"Geospatial Analytics component within
|
248
|
-
the IBM Environmental Intelligence Suite\" according to available IBM publicity
|
249
|
-
materials). In reality this illustrates a new dichotomy: even if \"everyone\"
|
250
|
-
in principle has access to huge quantities of publicly available data, maybe
|
251
|
-
only a few well funded entities now have the computational ability to make
|
252
|
-
sense of it, and can charge clients a good fee for their services...</p>\n\n<p>I
|
253
|
-
present this account partly to give a brief picture of \"macroscope\" concepts
|
254
|
-
today and in the past, for those who may be interested, and partly to present
|
255
|
-
a few personal views which would be out of scope in a \"neutral point of view\"
|
256
|
-
article such as is required on Wikipedia; also to see if readers of this blog
|
257
|
-
would like to contribute further to discussion of any of the concepts traversed
|
258
|
-
herein.</p>","tags":["guest post","macroscope"],"language":"en","references":[]},{"id":"https://doi.org/10.59350/gf1dw-n1v47","uuid":"a41163e0-9c9a-41e0-a141-f772663f2f32","url":"https://iphylo.blogspot.com/2023/03/dugald-stuart-page-1936-2022.html","title":"Dugald
|
259
|
-
Stuart Page 1936-2022","summary":"My dad died last weekend. Below is a notice
|
260
|
-
in today''s New Zealand Herald. I''m in New Zealand for his funeral. Don''t
|
261
|
-
really have the words for this right now.","date_published":"2023-03-14T03:00:00Z","date_modified":"2023-03-22T07:25:56Z","date_indexed":"1909-06-16T10:41:55+00:00","authors":[{"url":null,"name":"Roderic
|
262
|
-
Page"}],"image":null,"content_html":"<div class=\"separator\" style=\"clear:
|
263
|
-
both;\"><a href=\"https://blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEjZweukxntl7R5jnk3knVFVrqZ5RxC7mPZBV4gKeDIglbFzs2O442nbxqs8t8jV2tLqCU24K6gS32jW-Pe8q3O_5JR1Ms3qW1aQAZ877cKkFfcUydqUba9HsgNlX-zS9Ne92eLxRGS8F-lStTecJw2oalp3u58Yoc0oM7CUin5LKPeFIJ7Rzg/s3454/_DSC5106.jpg\"
|
264
|
-
style=\"display: block; padding: 1em 0; text-align: center; \"><img alt=\"\"
|
265
|
-
border=\"0\" width=\"400\" data-original-height=\"2582\" data-original-width=\"3454\"
|
266
|
-
src=\"https://blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEjZweukxntl7R5jnk3knVFVrqZ5RxC7mPZBV4gKeDIglbFzs2O442nbxqs8t8jV2tLqCU24K6gS32jW-Pe8q3O_5JR1Ms3qW1aQAZ877cKkFfcUydqUba9HsgNlX-zS9Ne92eLxRGS8F-lStTecJw2oalp3u58Yoc0oM7CUin5LKPeFIJ7Rzg/s400/_DSC5106.jpg\"/></a></div>\n\nMy
|
267
|
-
dad died last weekend. Below is a notice in today''s New Zealand Herald. I''m
|
268
|
-
in New Zealand for his funeral. Don''t really have the words for this right
|
269
|
-
now.\n\n<div class=\"separator\" style=\"clear: both;\"><a href=\"https://blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEiRUTOFF1VWHCl8dg3FQuaWy5LM7aX8IivdRpTtzgrdQTEymsA5bLTZE3cSQf1WQIP3XrC46JsLScP8BxTK9C5a-B1i51yg8WGSJD0heJVaoDLnerv0lD1o3qloDjqEuuyfX4wagHB5YYBmjWnGeVQvyYVngvDDf9eM6pmMtZ7x94Y4jSVrug/s3640/IMG_2870.jpeg\"
|
270
|
-
style=\"display: block; padding: 1em 0; text-align: center; \"><img alt=\"\"
|
271
|
-
border=\"0\" height=\"320\" data-original-height=\"3640\" data-original-width=\"1391\"
|
272
|
-
src=\"https://blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEiRUTOFF1VWHCl8dg3FQuaWy5LM7aX8IivdRpTtzgrdQTEymsA5bLTZE3cSQf1WQIP3XrC46JsLScP8BxTK9C5a-B1i51yg8WGSJD0heJVaoDLnerv0lD1o3qloDjqEuuyfX4wagHB5YYBmjWnGeVQvyYVngvDDf9eM6pmMtZ7x94Y4jSVrug/s320/IMG_2870.jpeg\"/></a></div>","tags":[],"language":"en","references":[]},{"id":"https://doi.org/10.59350/cbzgz-p8428","uuid":"a93134aa-8b33-4dc7-8cd4-76cdf64732f4","url":"https://iphylo.blogspot.com/2023/04/library-interfaces-knowledge-graphs-and.html","title":"Library
|
70
|
+
microscope optimised for...","published_at":1633610280,"updated_at":1633688782,"indexed_at":1689006804,"authors":[{"url":"https://orcid.org/0000-0002-7101-9767","name":"Roderic
|
71
|
+
Page"}],"image":null,"tags":["guest post","macroscope"],"language":"en","reference":[]},{"id":"f3629c86-06e0-42c0-844a-266b03a91ef1","doi":"https://doi.org/10.59350/37y2z-gre70","url":"https://iphylo.blogspot.com/2023/05/ten-years-and-million-links.html","title":"Ten
|
72
|
+
years and a million links","summary":"As trailed on a Twitter thread last
|
73
|
+
week I’ve been working on a manuscript describing the efforts to map taxonomic
|
74
|
+
names to their original descriptions in the taxonomic literature. Putting
|
75
|
+
together a manuscript on linking taxonomic names to the primary literature,
|
76
|
+
basically “um, what, exactly, have you been doing all these years?”. TL;DR
|
77
|
+
Across fungi, plants, and animals approx 1.3 million names have been linked
|
78
|
+
to a persistent identifier for a publication.— Roderic Page (@rdmpage) May
|
79
|
+
25,...","published_at":1685553960,"updated_at":1685554180,"indexed_at":1689006804,"authors":[{"url":"https://orcid.org/0000-0002-7101-9767","name":"Roderic
|
80
|
+
Page"}],"image":null,"tags":[],"language":"en","reference":[]},{"id":"6bed78ec-0029-4096-b1c3-48a55a9fdb3b","doi":"https://doi.org/10.59350/ws094-1w310","url":"https://iphylo.blogspot.com/2023/04/chatgpt-of-course.html","title":"ChatGPT,
|
81
|
+
of course","summary":"I haven’t blogged for a while, work and other reasons
|
82
|
+
have meant I’ve not had much time to think, and mostly I blog to help me think.
|
83
|
+
ChatGPT is obviously a big thing at the moment, and once we get past the moral
|
84
|
+
panic (“students can pass exams using AI!”) there are a lot of interesting
|
85
|
+
possibilities to explore. Inspired by essays such as How Q&A systems based
|
86
|
+
on large language models (eg GPT4) will change things if they become the dominant
|
87
|
+
search paradigm — 9 implications for libraries...","published_at":1680526320,"updated_at":1680526621,"indexed_at":1689006804,"authors":[{"url":"https://orcid.org/0000-0002-7101-9767","name":"Roderic
|
88
|
+
Page"}],"image":null,"tags":[],"language":"en","reference":[]},{"id":"7d814863-43b5-4faf-a475-da8de5efd3ef","doi":"https://doi.org/10.59350/m7gb7-d7c49","url":"https://iphylo.blogspot.com/2022/02/duplicate-dois-again.html","title":"Duplicate
|
89
|
+
DOIs (again)","summary":"This blog post provides some background to a recent
|
90
|
+
tweet where I expressed my frustration about the duplication of DOIs for the
|
91
|
+
same article. I''m going to document the details here. The DOI that alerted
|
92
|
+
me to this problem is https://doi.org/10.2307/2436688 which is for the article
|
93
|
+
Snyder, W. C., & Hansen, H. N. (1940). THE SPECIES CONCEPT IN FUSARIUM. American
|
94
|
+
Journal of Botany, 27(2), 64–67. This article is hosted by JSTOR at https://www.jstor.org/stable/2436688
|
95
|
+
which displays the DOI...","published_at":1644332760,"updated_at":1644332778,"indexed_at":1689006804,"authors":[{"url":"https://orcid.org/0000-0002-7101-9767","name":"Roderic
|
96
|
+
Page"}],"image":null,"tags":["CrossRef","DOI","duplicates"],"language":"en","reference":[]},{"id":"23fa1dd8-5c6b-4aa9-9cad-c6f6b14ae9e0","doi":"https://doi.org/10.59350/jzvs4-r9559","url":"https://iphylo.blogspot.com/2021/08/json-ld-in-wild-examples-of-how.html","title":"JSON-LD
|
97
|
+
in the wild: examples of how structured data is represented on the web","summary":"I''ve
|
98
|
+
created a GitHub repository so that I can keep track of the examples of JSON-LD
|
99
|
+
that I''ve seen being actively used, for example embedded in web sites, or
|
100
|
+
accessed using an API. The repository is https://github.com/rdmpage/wild-json-ld.
|
101
|
+
The list is by no means exhaustive, I hope to add more examples as I come
|
102
|
+
across them. One reason for doing this is to learn what others are doing.
|
103
|
+
For example, after looking at SciGraph''s JSON-LD I now see how an ordered
|
104
|
+
list can be modelled in RDF in...","published_at":1630070400,"updated_at":1630070987,"indexed_at":1688982503,"authors":[{"url":"https://orcid.org/0000-0002-7101-9767","name":"Roderic
|
105
|
+
Page"}],"image":null,"tags":["JSON-LD","RDF"],"language":"en","reference":[]},{"id":"5891c709-d139-440f-bacb-06244424587a","doi":"https://doi.org/10.59350/pmhat-5ky65","url":"https://iphylo.blogspot.com/2021/10/problems-with-plazi-parsing-how.html","title":"Problems
|
106
|
+
with Plazi parsing: how reliable are automated methods for extracting specimens
|
107
|
+
from the literature?","summary":"The Plazi project has become one of the major
|
108
|
+
contributors to GBIF with some 36,000 datasets yielding some 500,000 occurrences
|
109
|
+
(see Plazi''s GBIF page for details). These occurrences are extracted from
|
110
|
+
taxonomic publication using automated methods. New data is published almost
|
111
|
+
daily (see latest treatments). The map below shows the geographic distribution
|
112
|
+
of material citations provided to GBIF by Plazi, which gives you a sense of
|
113
|
+
the size of the dataset. By any metric Plazi represents a...","published_at":1635160200,"updated_at":1635437298,"indexed_at":1688982503,"authors":[{"url":"https://orcid.org/0000-0002-7101-9767","name":"Roderic
|
114
|
+
Page"}],"image":null,"tags":["data quality","parsing","Plazi","specimen","text
|
115
|
+
mining"],"language":"en","reference":[]},{"id":"3cb94422-5506-4e24-a41c-a250bb521ee0","doi":"https://doi.org/10.59350/c79vq-7rr11","url":"https://iphylo.blogspot.com/2021/12/graphql-for-wikidata-wikicite.html","title":"GraphQL
|
116
|
+
for WikiData (WikiCite)","summary":"I''ve released a very crude GraphQL endpoint
|
117
|
+
for WikiData. More precisely, the endpoint is for a subset of the entities
|
118
|
+
that are of interest to WikiCite, such as scholarly articles, people, and
|
119
|
+
journals. There is a crude demo at https://wikicite-graphql.herokuapp.com.
|
120
|
+
The endpoint itself is at https://wikicite-graphql.herokuapp.com/gql.php.
|
121
|
+
There are various ways to interact with the endpoint, personally I like the
|
122
|
+
Altair GraphQL Client by Samuel Imolorhe. As I''ve mentioned earlier it''s
|
123
|
+
taken...","published_at":1640006160,"updated_at":1640006405,"indexed_at":1689006804,"authors":[{"url":"https://orcid.org/0000-0002-7101-9767","name":"Roderic
|
124
|
+
Page"}],"image":null,"tags":["GraphQL","SPARQL","WikiCite","Wikidata"],"language":"en","reference":[]},{"id":"62e7b438-67a3-44ac-a66d-3f5c278c949e","doi":"https://doi.org/10.59350/3s376-6bm21","url":"https://iphylo.blogspot.com/2022/02/deduplicating-bibliographic-data.html","title":"Deduplicating
|
125
|
+
bibliographic data","summary":"There are several instances where I have a
|
126
|
+
collection of references that I want to deduplicate and merge. For example,
|
127
|
+
in Zootaxa has no impact factor I describe a dataset of the literature cited
|
128
|
+
by articles in the journal Zootaxa. This data is available on Figshare (https://doi.org/10.6084/m9.figshare.c.5054372.v4),
|
129
|
+
as is the equivalent dataset for Phytotaxa (https://doi.org/10.6084/m9.figshare.c.5525901.v1).
|
130
|
+
Given that the same articles may be cited many times, these datasets have
|
131
|
+
lots of...","published_at":1643900940,"updated_at":1643901089,"indexed_at":1689006804,"authors":[{"url":"https://orcid.org/0000-0002-7101-9767","name":"Roderic
|
132
|
+
Page"}],"image":null,"tags":["data cleaning","deduplication","Phytotaxa","Wikispecies","Zootaxa"],"language":"en","reference":[]},{"id":"d33d4f49-b281-4997-9eb9-dbad1e52d9bd","doi":"https://doi.org/10.59350/92rdb-5fe58","url":"https://iphylo.blogspot.com/2022/09/local-global-identifiers-for.html","title":"Local
|
133
|
+
global identifiers for decentralised wikis","summary":"I''ve been thinking
|
134
|
+
a bit about how one could use a Markdown wiki-like tool such as Obsidian to
|
135
|
+
work with taxonomic data (see earlier posts Obsidian, markdown, and taxonomic
|
136
|
+
trees and Personal knowledge graphs: Obsidian, Roam, Wikidata, and Xanadu).
|
137
|
+
One \"gotcha\" would be how to name pages. If we treat the database as entirely
|
138
|
+
local, then the page names don''t matter, but what if we envisage sharing
|
139
|
+
the database, or merging it with others (for example, if we divided a taxon
|
140
|
+
up into chunks, and...","published_at":1662653340,"updated_at":1662657862,"indexed_at":1688982864,"authors":[{"url":"https://orcid.org/0000-0002-7101-9767","name":"Roderic
|
141
|
+
Page"}],"image":null,"tags":["citekey","identfiiers","markdown","obsidian","Roger
|
142
|
+
Hyam"],"language":"en","reference":[]},{"id":"6a4d5c44-f4a9-4d40-a32c-a4d5e512c55a","doi":"https://doi.org/10.59350/rfxj3-x6739","url":"https://iphylo.blogspot.com/2022/05/thoughts-on-treebase-dying.html","title":"Thoughts
|
143
|
+
on TreeBASE dying(?)","summary":"@rvosa is Naturalis no longer hosting Treebase?
|
144
|
+
https://t.co/MBRgcxaBmR— Hilmar Lapp (@hlapp) May 10, 2022 So it looks like
|
145
|
+
TreeBASE is in trouble, it''s legacy Java code a victim of security issues.
|
146
|
+
Perhaps this is a chance to rethink TreeBASE, assuming that a repository of
|
147
|
+
published phylogenies is still considered a worthwhile thing to have (and
|
148
|
+
I think that question is open). Here''s what I think could be done. The data
|
149
|
+
(individual studies with trees and data) are packaged into...","published_at":1652287980,"updated_at":1652350205,"indexed_at":1689006804,"authors":[{"url":"https://orcid.org/0000-0002-7101-9767","name":"Roderic
|
150
|
+
Page"}],"image":null,"tags":["phylogeny","TreeBASE"],"language":"en","reference":[]},{"id":"3e1278f6-e7c0-43e1-bb54-6829e1344c0d","doi":"https://doi.org/10.59350/btdk4-42879","url":"https://iphylo.blogspot.com/2022/09/the-ideal-taxonomic-journal.html","title":"The
|
151
|
+
ideal taxonomic journal","summary":"This is just some random notes on an “ideal”
|
152
|
+
taxonomic journal, inspired in part by some recent discussions on “turbo-taxonomy”
|
153
|
+
(e.g., https://doi.org/10.3897/zookeys.1087.76720 and https://doi.org/10.1186/1742-9994-10-15),
|
154
|
+
and also examples such as the Australian Journal of Taxonomy https://doi.org/10.54102/ajt.qxi3r
|
155
|
+
which seems well-intentioned but limited. XML One approach is to have highly
|
156
|
+
structured text that embeds detailed markup, and ideally a tool that generates
|
157
|
+
markup in XML. This is...","published_at":1664460000,"updated_at":1664460001,"indexed_at":1689006804,"authors":[{"url":"https://orcid.org/0000-0002-7101-9767","name":"Roderic
|
158
|
+
Page"}],"image":null,"tags":[],"language":"en","reference":[]},{"id":"a93134aa-8b33-4dc7-8cd4-76cdf64732f4","doi":"https://doi.org/10.59350/cbzgz-p8428","url":"https://iphylo.blogspot.com/2023/04/library-interfaces-knowledge-graphs-and.html","title":"Library
|
273
159
|
interfaces, knowledge graphs, and Miller columns","summary":"Some quick notes
|
274
160
|
on interface ideas for digital libraries and/or knowledge graphs. Recently
|
275
161
|
there’s been something of an explosion in bibliographic tools to explore the
|
@@ -277,308 +163,114 @@ http_interactions:
|
|
277
163
|
papers _scite which uses AI to do sentiment analysis on citations (does paper
|
278
164
|
A cite paper B favourably or not?) ResearchRabbit which uses lists, networks,
|
279
165
|
and timelines to discover related research Scispace which navigates connections
|
280
|
-
between...","
|
281
|
-
Page"}],"image":null,"
|
282
|
-
for digital libraries and/or knowledge graphs.</p>\n<p>Recently there’s been
|
283
|
-
something of an explosion in bibliographic tools to explore the literature.
|
284
|
-
Examples include:</p>\n<ul>\n<li><a href=\"https://elicit.org\">Elicit</a>
|
285
|
-
which uses AI to search for and summarise papers</li>\n<li><a href=\"https://scite.ai\">_scite</a>
|
286
|
-
which uses AI to do sentiment analysis on citations (does paper A cite paper
|
287
|
-
B favourably or not?)</li>\n<li><a href=\"https://www.researchrabbit.ai\">ResearchRabbit</a>
|
288
|
-
which uses lists, networks, and timelines to discover related research</li>\n<li><a
|
289
|
-
href=\"https://typeset.io\">Scispace</a> which navigates connections between
|
290
|
-
papers, authors, topics, etc., and provides AI summaries.</li>\n</ul>\n<p>As
|
291
|
-
an aside, I think these (and similar tools) are a great example of how bibliographic
|
292
|
-
data such as abstracts, the citation graph and - to a lesser extent - full
|
293
|
-
text - have become commodities. That is, what was once proprietary information
|
294
|
-
is now free to anyone, which in turns means a whole ecosystem of new tools
|
295
|
-
can emerge. If I was clever I’d be building a <a href=\"https://en.wikipedia.org/wiki/Wardley_map\">Wardley
|
296
|
-
map</a> to explore this. Note that a decade or so ago reference managers like
|
297
|
-
<a href=\"https://www.zotero.org\">Zotero</a> were made possible by publishers
|
298
|
-
exposing basic bibliographic data on their articles. As we move to <a href=\"https://i4oc.org\">open
|
299
|
-
citations</a> we are seeing the next generation of tools.</p>\n<p>Back to
|
300
|
-
my main topic. As usual, rather than focus on what these tools do I’m more
|
301
|
-
interested in how they <strong>look</strong>. I have history here, when the
|
302
|
-
iPad came out I was intrigued by the possibilities it offered for displaying
|
303
|
-
academic articles, as discussed <a href=\"https://iphylo.blogspot.com/2010/08/viewing-scientific-articles-on-ipad.html\">here</a>,
|
304
|
-
<a href=\"https://iphylo.blogspot.com/2010/09/viewing-scientific-articles-on-ipad.html\">here</a>,
|
305
|
-
<a href=\"https://iphylo.blogspot.com/2010/08/viewing-scientific-articles-on-ipad_24.html\">here</a>,
|
306
|
-
<a href=\"https://iphylo.blogspot.com/2010/08/viewing-scientific-articles-on-ipad_3052.html\">here</a>,
|
307
|
-
and <a href=\"https://iphylo.blogspot.com/2010/08/viewing-scientific-articles-on-ipad_31.html\">here</a>.
|
308
|
-
ResearchRabbit looks like this:</p>\n<div style=\"padding:86.91% 0 0 0;position:relative;\"><iframe
|
309
|
-
src=\"https://player.vimeo.com/video/820871442?h=23b05b0dae&badge=0&autopause=0&player_id=0&app_id=58479\"
|
310
|
-
frameborder=\"0\" allow=\"autoplay; fullscreen; picture-in-picture\" allowfullscreen
|
311
|
-
style=\"position:absolute;top:0;left:0;width:100%;height:100%;\" title=\"ResearchRabbit\"></iframe></div><script
|
312
|
-
src=\"https://player.vimeo.com/api/player.js\"></script>\n<p>Scispace’s <a
|
313
|
-
href=\"https://typeset.io/explore/journals/parassitologia-1ieodjwe\">“trace”
|
314
|
-
view</a> looks like this:</p>\n<div style=\"padding:84.55% 0 0 0;position:relative;\"><iframe
|
315
|
-
src=\"https://player.vimeo.com/video/820871348?h=2db7b661ef&badge=0&autopause=0&player_id=0&app_id=58479\"
|
316
|
-
frameborder=\"0\" allow=\"autoplay; fullscreen; picture-in-picture\" allowfullscreen
|
317
|
-
style=\"position:absolute;top:0;left:0;width:100%;height:100%;\" title=\"Scispace
|
318
|
-
screencast\"></iframe></div><script src=\"https://player.vimeo.com/api/player.js\"></script>\n<p>What
|
319
|
-
is interesting about both is that they display content from left to right
|
320
|
-
in vertical columns, rather than the more common horizontal rows. This sort
|
321
|
-
of display is sometimes called <a href=\"https://en.wikipedia.org/wiki/Miller_columns\">Miller
|
322
|
-
columns</a> or a <a href=\"https://web.archive.org/web/20210726134921/http://designinginterfaces.com/firstedition/index.php?page=Cascading_Lists\">cascading
|
323
|
-
list</a>.</p>\n\n<div class=\"separator\" style=\"clear: both;\"><a href=\"https://blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEjBPnV9fRBcvm-BX5PjzfG5Cff9PerCLsTW8d5ZbsL6b41t7ypD7ovmcgfTf3b4b34mbq8NM4sfwOHkgEq32FLYnD497RFQD4HQmYmh5Eveu1zWdDVyKyDtPyE98QoxTaOEnLA5kK0fnl3dOOEgUvtVKlTZ8bt1gj2v_8tDRWl9f50ybyei3A/s1024/GNUstep-liveCD.png\"
|
324
|
-
style=\"display: block; padding: 1em 0; text-align: center; \"><img alt=\"\"
|
325
|
-
border=\"0\" width=\"400\" data-original-height=\"768\" data-original-width=\"1024\"
|
326
|
-
src=\"https://blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEjBPnV9fRBcvm-BX5PjzfG5Cff9PerCLsTW8d5ZbsL6b41t7ypD7ovmcgfTf3b4b34mbq8NM4sfwOHkgEq32FLYnD497RFQD4HQmYmh5Eveu1zWdDVyKyDtPyE98QoxTaOEnLA5kK0fnl3dOOEgUvtVKlTZ8bt1gj2v_8tDRWl9f50ybyei3A/s400/GNUstep-liveCD.png\"/></a></div>\n\n<p>By
|
327
|
-
Gürkan Sengün (talk) - Own work, Public Domain, <a href=\"https://commons.wikimedia.org/w/index.php?curid=594715\">https://commons.wikimedia.org/w/index.php?curid=594715</a></p>\n<p>I’ve
|
328
|
-
always found displaying a knowledge graph to be a challenge, as discussed
|
329
|
-
<a href=\"https://iphylo.blogspot.com/2019/07/notes-on-collections-knowledge-graphs.html\">elsewhere
|
330
|
-
on this blog</a> and in my paper on <a href=\"https://peerj.com/articles/6739/#p-29\">Ozymandias</a>.
|
331
|
-
Miller columns enable one to drill down in increasing depth, but it doesn’t
|
332
|
-
need to be a tree, it can be a path within a network. What I like about ResearchRabbit
|
333
|
-
and the original Scispace interface is that they present the current item
|
334
|
-
together with a list of possible connections (e.g., authors, citations) that
|
335
|
-
you can drill down on. Clicking on these will result in a new column being
|
336
|
-
appended to the right, with a view (typically a list) of the next candidates
|
337
|
-
to visit. In graph terms, these are adjacent nodes to the original item. The
|
338
|
-
clickable badges on each item can be thought of as sets of edges that have
|
339
|
-
the same label (e.g., “authored by”, “cites”, “funded”, “is about”, etc.).
|
340
|
-
Each of these nodes itself becomes a starting point for further exploration.
|
341
|
-
Note that the original starting point isn’t privileged, other than being the
|
342
|
-
starting point. That is, each time we drill down we are seeing the same type
|
343
|
-
of information displayed in the same way. Note also that the navigation can
|
344
|
-
be though of as a <strong>card</strong> for a node, with <strong>buttons</strong>
|
345
|
-
grouping the adjacent nodes. When we click on an individual button, it expands
|
346
|
-
into a <strong>list</strong> in the next column. This can be thought of as
|
347
|
-
a preview for each adjacent node. Clicking on an element in the list generates
|
348
|
-
a new card (we are viewing a single node) and we get another set of buttons
|
349
|
-
corresponding to the adjacent nodes.</p>\n<p>One important behaviour in a
|
350
|
-
Miller column interface is that the current path can be pruned at any point.
|
351
|
-
If we go back (i.e., scroll to the left) and click on another tab on an item,
|
352
|
-
everything downstream of that item (i.e., to the right) gets deleted and replaced
|
353
|
-
by a new set of nodes. This could make retrieving a particular history of
|
354
|
-
browsing a bit tricky, but encourages exploration. Both Scispace and ResearchRabbit have
|
355
|
-
the ability to add items to a collection, so you can keep track of things
|
356
|
-
you discover.</p>\n<p>Lots of food for thought, I’m assuming that there is
|
357
|
-
some user interface/experience research on Miller columns. One thing to remember
|
358
|
-
is that Miller columns are most often associated with trees, but in this case
|
359
|
-
we are exploring a network. That means that potentially there is no limit
|
360
|
-
to the number of columns being generated as we wander through the graph. It
|
361
|
-
will be interesting to think about what the average depth is likely to be,
|
362
|
-
in other words, how deep down the rabbit hole will be go?</p>\n\n<h3>Update</h3>\n<p>Should
|
363
|
-
add link to David Regev''s explorations of <a href=\"https://medium.com/david-regev-on-ux/flow-browser-b730daf0f717\">Flow
|
364
|
-
Browser</a>.\n\n<blockquote>\n<p>Written with <a href=\"https://stackedit.io/\">StackEdit</a>.</p>\n</blockquote>","tags":["cards","flow","Knowledge
|
365
|
-
Graph","Miller column","RabbitResearch"],"language":"en","references":[]},{"id":"https://doi.org/10.59350/t6fb9-4fn44","uuid":"8bc3fea6-cb86-4344-8dad-f312fbf58041","url":"https://iphylo.blogspot.com/2021/12/the-business-of-extracting-knowledge.html","title":"The
|
366
|
-
Business of Extracting Knowledge from Academic Publications","summary":"Markus
|
367
|
-
Strasser (@mkstra write a fascinating article entitled \"The Business of Extracting
|
368
|
-
Knowledge from Academic Publications\". I spent months working on domain-specific
|
369
|
-
search engines and knowledge discovery apps for biomedicine and eventually
|
370
|
-
figured that synthesizing \"insights\" or building knowledge graphs by machine-reading
|
371
|
-
the academic literature (papers) is *barely useful* :https://t.co/eciOg30Odc—
|
372
|
-
Markus Strasser (@mkstra) December 7, 2021 His TL;DR: TL;DR: I worked on biomedical...","date_published":"2021-12-11T00:01:00Z","date_modified":"2021-12-11T00:01:21Z","date_indexed":"1909-06-16T11:32:09+00:00","authors":[{"url":null,"name":"Roderic
|
373
|
-
Page"}],"image":null,"content_html":"<p>Markus Strasser (<a href=\"https://twitter.com/mkstra\">@mkstra</a>
|
374
|
-
write a fascinating article entitled <a href=\"https://markusstrasser.org/extracting-knowledge-from-literature/\">\"The
|
375
|
-
Business of Extracting Knowledge from Academic Publications\"</a>.</p>\n\n<blockquote
|
376
|
-
class=\"twitter-tweet\"><p lang=\"en\" dir=\"ltr\">I spent months working
|
377
|
-
on domain-specific search engines and knowledge discovery apps for biomedicine
|
378
|
-
and eventually figured that synthesizing "insights" or building
|
379
|
-
knowledge graphs by machine-reading the academic literature (papers) is *barely
|
380
|
-
useful* :<a href=\"https://t.co/eciOg30Odc\">https://t.co/eciOg30Odc</a></p>—
|
381
|
-
Markus Strasser (@mkstra) <a href=\"https://twitter.com/mkstra/status/1468334482113523716?ref_src=twsrc%5Etfw\">December
|
382
|
-
7, 2021</a></blockquote> <script async src=\"https://platform.twitter.com/widgets.js\"
|
383
|
-
charset=\"utf-8\"></script>\n\n<p>His TL;DR:</p>\n\n<p><blockquote>\nTL;DR:
|
384
|
-
I worked on biomedical literature search, discovery and recommender web applications
|
385
|
-
for many months and concluded that extracting, structuring or synthesizing
|
386
|
-
\"insights\" from academic publications (papers) or building knowledge bases
|
387
|
-
from a domain corpus of literature has negligible value in industry.</p>\n\n<p>Close
|
388
|
-
to nothing of what makes science actually work is published as text on the
|
389
|
-
web.\n</blockquote></p>\n\n<p>After recounting the many problems of knowledge
|
390
|
-
extraction - including a swipe at nanopubs which \"are ... dead in my view
|
391
|
-
(without admitting it)\" - he concludes:</p>\n\n<p><blockquote>\nI’ve been
|
392
|
-
flirting with this entire cluster of ideas including open source web annotation,
|
393
|
-
semantic search and semantic web, public knowledge graphs, nano-publications,
|
394
|
-
knowledge maps, interoperable protocols and structured data, serendipitous
|
395
|
-
discovery apps, knowledge organization, communal sense making and academic
|
396
|
-
literature/publishing toolchains for a few years on and off ... nothing of
|
397
|
-
it will go anywhere.</p>\n\n<p>Don’t take that as a challenge. Take it as
|
398
|
-
a red flag and run. Run towards better problems.\n</blockquote></p>\n\n<p>Well
|
399
|
-
worth a read, and much food for thought.</p>","tags":["ai","business model","text
|
400
|
-
mining"],"language":"en","references":[]},{"id":"https://doi.org/10.59350/463yw-pbj26","uuid":"dc829ab3-f0f1-40a4-b16d-a36dc0e34166","url":"https://iphylo.blogspot.com/2022/12/david-remsen.html","title":"David
|
166
|
+
between...","published_at":1682427660,"updated_at":1682607068,"indexed_at":1689006804,"authors":[{"url":"https://orcid.org/0000-0002-7101-9767","name":"Roderic
|
167
|
+
Page"}],"image":null,"tags":["cards","flow","Knowledge Graph","Miller column","RabbitResearch"],"language":"en","reference":[]},{"id":"dc829ab3-f0f1-40a4-b16d-a36dc0e34166","doi":"https://doi.org/10.59350/463yw-pbj26","url":"https://iphylo.blogspot.com/2022/12/david-remsen.html","title":"David
|
401
168
|
Remsen","summary":"I heard yesterday from Martin Kalfatovic (BHL) that David
|
402
169
|
Remsen has died. Very sad news. It''s starting to feel like iPhylo might end
|
403
170
|
up being a list of obituaries of people working on biodiversity informatics
|
404
171
|
(e.g., Scott Federhen). I spent several happy visits at MBL at Woods Hole
|
405
172
|
talking to Dave at the height of the uBio project, which really kickstarted
|
406
173
|
large scale indexing of taxonomic names, and the use of taxonomic name finding
|
407
|
-
tools to index the literature. His work on uBio with David...","
|
408
|
-
Page"}],"image":null,"
|
409
|
-
|
410
|
-
|
411
|
-
|
412
|
-
|
413
|
-
|
414
|
-
|
415
|
-
|
416
|
-
|
417
|
-
|
418
|
-
|
419
|
-
to
|
420
|
-
|
421
|
-
|
422
|
-
|
423
|
-
|
424
|
-
|
425
|
-
|
426
|
-
|
427
|
-
|
428
|
-
|
429
|
-
|
430
|
-
|
431
|
-
|
432
|
-
|
433
|
-
|
434
|
-
|
435
|
-
|
436
|
-
|
437
|
-
|
438
|
-
|
439
|
-
|
440
|
-
|
441
|
-
|
442
|
-
|
443
|
-
|
444
|
-
|
445
|
-
|
446
|
-
|
447
|
-
|
448
|
-
|
449
|
-
|
450
|
-
|
451
|
-
|
452
|
-
|
453
|
-
|
454
|
-
|
455
|
-
|
456
|
-
|
457
|
-
|
458
|
-
|
459
|
-
|
460
|
-
|
461
|
-
|
462
|
-
|
463
|
-
|
464
|
-
|
465
|
-
|
466
|
-
|
467
|
-
|
468
|
-
|
469
|
-
|
470
|
-
|
471
|
-
|
472
|
-
|
473
|
-
|
474
|
-
|
475
|
-
|
476
|
-
|
477
|
-
|
478
|
-
is to
|
479
|
-
|
480
|
-
|
481
|
-
as
|
482
|
-
|
483
|
-
|
484
|
-
|
485
|
-
|
486
|
-
|
487
|
-
|
488
|
-
|
489
|
-
|
490
|
-
|
491
|
-
|
492
|
-
|
493
|
-
|
494
|
-
|
495
|
-
|
496
|
-
|
497
|
-
|
498
|
-
|
499
|
-
|
500
|
-
|
501
|
-
|
502
|
-
|
503
|
-
|
504
|
-
|
505
|
-
|
506
|
-
|
507
|
-
|
508
|
-
|
509
|
-
|
510
|
-
we think are the same. Another way to think of this is that we are getting
|
511
|
-
the components of a graph where each node is a reference and pair of references
|
512
|
-
that match are connected by an edge.</p>\n\n<p>In the code I''m using I write
|
513
|
-
this graph in <a href=\"https://en.wikipedia.org/wiki/Trivial_Graph_Format\">Trivial
|
514
|
-
Graph Format</a> (TGF) which can be visualised using a tools such as <a href=\"https://www.yworks.com/products/yed\">yEd</a>.</p>\n\n<h2>Merging</h2>\n\n<p>Now
|
515
|
-
that we have a graph representing the sets of references that we think are
|
516
|
-
the same we need to merge them. This is where things get interesting as the
|
517
|
-
references are similar (by definition) but may differ in some details. The
|
518
|
-
paper below describes a simple Bayesian approach for merging records:</p>\n\n<blockquote>\nCouncill
|
519
|
-
IG, Li H, Zhuang Z, Debnath S, Bolelli L, Lee WC, Sivasubramaniam A, Giles
|
520
|
-
CL. 2006. Learning Metadata from the Evidence in an On-line Citation Matching
|
521
|
-
Scheme. In: Proceedings of the 6th ACM/IEEE-CS Joint Conference on Digital
|
522
|
-
Libraries. JCDL ’06. New York, NY, USA: ACM, 276–285. DOI: <a href=\"https://doi.org/10.1145/1141753.1141817\">10.1145/1141753.1141817</a>.\n</blockquote>\n\n<p>So
|
523
|
-
the next step is to read the graph with the clusters, generate the sets of
|
524
|
-
bibliographic references that correspond to each cluster, then use the method
|
525
|
-
described in Councill et al. to produce a single bibliographic record for
|
526
|
-
that cluster. These records could then be used to, say locate the corresponding
|
527
|
-
article in BHL, or populate Wikidata with missing references.</p>\n\n<p>Obviously
|
528
|
-
there is always the potential for errors, such as trying to merge references
|
529
|
-
that are not the same. As a quick and dirty check I flag as dubious any cluster
|
530
|
-
where the page numbers vary among members of the cluster. More sophisticated
|
531
|
-
checks are possible, especially if I go down the ML route (i.e., I would have
|
532
|
-
evidence for the probability that the same reference can disagree on some
|
533
|
-
aspects of metadata).</p>\n\n<h2>Summary</h2>\n\n<p>At this stage the code
|
534
|
-
is working well enough for me to play with and explore some example datasets.
|
535
|
-
The focus is on structured bibliographic metadata, but I may simplify things
|
536
|
-
and have a version that handles simple string matching, for example to cluster
|
537
|
-
together different abbreviations of the same journal name.</p>","tags":["data
|
538
|
-
cleaning","deduplication","Phytotaxa","Wikispecies","Zootaxa"],"language":"en","references":[]},{"id":"https://doi.org/10.59350/c79vq-7rr11","uuid":"3cb94422-5506-4e24-a41c-a250bb521ee0","url":"https://iphylo.blogspot.com/2021/12/graphql-for-wikidata-wikicite.html","title":"GraphQL
|
539
|
-
for WikiData (WikiCite)","summary":"I''ve released a very crude GraphQL endpoint
|
540
|
-
for WikiData. More precisely, the endpoint is for a subset of the entities
|
541
|
-
that are of interest to WikiCite, such as scholarly articles, people, and
|
542
|
-
journals. There is a crude demo at https://wikicite-graphql.herokuapp.com.
|
543
|
-
The endpoint itself is at https://wikicite-graphql.herokuapp.com/gql.php.
|
544
|
-
There are various ways to interact with the endpoint, personally I like the
|
545
|
-
Altair GraphQL Client by Samuel Imolorhe. As I''ve mentioned earlier it''s
|
546
|
-
taken...","date_published":"2021-12-20T13:16:00Z","date_modified":"2021-12-20T13:20:05Z","date_indexed":"1909-06-16T10:52:00+00:00","authors":[{"url":null,"name":"Roderic
|
547
|
-
Page"}],"image":null,"content_html":"<div class=\"separator\" style=\"clear:
|
548
|
-
both;\"><a href=\"https://blogger.googleusercontent.com/img/a/AVvXsEh7WW8TlfN2u3xe_E-sH5IK6AWYnAoWaKrP2b32UawUeqMpPlq6ZFk5BtJVZsMmCNh5j3QRsTj5H0Ee55RRGntnc9yjj_mNB8KHmH2dzocCgyLS2VFOxsBji6u4Ey6qxlDAnT-zrsBpnDcTchbhgt1x0Sf7RkmIMkS1y4-_3KCQian-SeIF-g=s1000\"
|
549
|
-
style=\"display: block; padding: 1em 0; text-align: center; clear: right;
|
550
|
-
float: right;\"><img alt=\"\" border=\"0\" width=\"128\" data-original-height=\"1000\"
|
551
|
-
data-original-width=\"1000\" src=\"https://blogger.googleusercontent.com/img/a/AVvXsEh7WW8TlfN2u3xe_E-sH5IK6AWYnAoWaKrP2b32UawUeqMpPlq6ZFk5BtJVZsMmCNh5j3QRsTj5H0Ee55RRGntnc9yjj_mNB8KHmH2dzocCgyLS2VFOxsBji6u4Ey6qxlDAnT-zrsBpnDcTchbhgt1x0Sf7RkmIMkS1y4-_3KCQian-SeIF-g=s200\"/></a></div><p>I''ve
|
552
|
-
released a very crude GraphQL endpoint for WikiData. More precisely, the endpoint
|
553
|
-
is for a subset of the entities that are of interest to WikiCite, such as
|
554
|
-
scholarly articles, people, and journals. There is a crude demo at <a href=\"https://wikicite-graphql.herokuapp.com\">https://wikicite-graphql.herokuapp.com</a>.
|
555
|
-
The endpoint itself is at <a href=\"https://wikicite-graphql.herokuapp.com/gql.php\">https://wikicite-graphql.herokuapp.com/gql.php</a>.
|
556
|
-
There are various ways to interact with the endpoint, personally I like the
|
557
|
-
<a href=\"https://altair.sirmuel.design\">Altair GraphQL Client</a> by <a
|
558
|
-
href=\"https://github.com/imolorhe\">Samuel Imolorhe</a>.</p>\n\n<p>As I''ve
|
559
|
-
<a href=\"https://iphylo.blogspot.com/2021/04/it-been-while.html\">mentioned
|
560
|
-
earlier</a> it''s taken me a while to see the point of GraphQL. But it is
|
561
|
-
clear it is gaining traction in the biodiversity world (see for example the
|
562
|
-
<a href=\"https://dev.gbif.org/hosted-portals.html\">GBIF Hosted Portals</a>)
|
563
|
-
so it''s worth exploring. My take on GraphQL is that it is a way to create
|
564
|
-
a self-describing API that someone developing a web site can use without them
|
565
|
-
having to bury themselves in the gory details of how data is internally modelled.
|
566
|
-
For example, WikiData''s query interface uses SPARQL, a powerful language
|
567
|
-
that has a steep learning curve (in part because of the administrative overhead
|
568
|
-
brought by RDF namespaces, etc.). In my previous SPARQL-based projects such
|
569
|
-
as <a href=\"https://ozymandias-demo.herokuapp.com\">Ozymandias</a> and <a
|
570
|
-
href=\"http://alec-demo.herokuapp.com\">ALEC</a> I have either returned SPARQL
|
571
|
-
results directly (Ozymandias) or formatted SPARQL results as <a href=\"https://schema.org/DataFeed\">schema.org
|
572
|
-
DataFeeds</a> (equivalent to RSS feeds) (ALEC). Both approaches work, but
|
573
|
-
they are project-specific and if anyone else tried to build based on these
|
574
|
-
projects they might struggle for figure out what was going on. I certainly
|
575
|
-
struggle, and I wrote them!</p>\n\n<p>So it seems worthwhile to explore this
|
576
|
-
approach a little further and see if I can develop a GraphQL interface that
|
577
|
-
can be used to build the sort of rich apps that I want to see. The demo I''ve
|
578
|
-
created uses SPARQL under the hood to provide responses to the GraphQL queries.
|
579
|
-
So in this sense it''s not replacing SPARQL, it''s simply providing a (hopefully)
|
580
|
-
simpler overlay on top of SPARQL so that we can retrieve the data we want
|
581
|
-
without having to learn the intricacies of SPARQL, nor how Wikidata models
|
582
|
-
publications and people.</p>","tags":["GraphQL","SPARQL","WikiCite","Wikidata"],"language":"en","references":[]}]}'
|
583
|
-
recorded_at: Sun, 18 Jun 2023 15:23:48 GMT
|
584
|
-
recorded_with: VCR 6.1.0
|
174
|
+
tools to index the literature. His work on uBio with David...","published_at":1671213240,"updated_at":1671264743,"indexed_at":1689006804,"authors":[{"url":"https://orcid.org/0000-0002-7101-9767","name":"Roderic
|
175
|
+
Page"}],"image":null,"tags":["David Remsen","obituary","uBio"],"language":"en","reference":[]},{"id":"30c78d9d-2e50-49db-9f4f-b3baa060387b","doi":"https://doi.org/10.59350/zc4qc-77616","url":"https://iphylo.blogspot.com/2022/09/does-anyone-cite-taxonomic-treatments.html","title":"Does
|
176
|
+
anyone cite taxonomic treatments?","summary":"Taxonomic treatments have come
|
177
|
+
up in various discussions I''m involved in, and I''m curious as to whether
|
178
|
+
they are actually being used, in particular, whether they are actually being
|
179
|
+
cited. Consider the following quote: The taxa are described in taxonomic treatments,
|
180
|
+
well defined sections of scientific publications (Catapano 2019). They include
|
181
|
+
a nomenclatural section and one or more sections including descriptions, material
|
182
|
+
citations referring to studied specimens, or notes ecology and...","published_at":1662050940,"updated_at":1662050991,"indexed_at":1688982503,"authors":[{"url":"https://orcid.org/0000-0002-7101-9767","name":"Roderic
|
183
|
+
Page"}],"image":null,"tags":[],"language":"en","reference":[]},{"id":"c6b101f4-bfbc-4d01-921d-805c43c85757","doi":"https://doi.org/10.59350/j77nc-e8x98","url":"https://iphylo.blogspot.com/2022/08/linking-taxonomic-names-to-literature.html","title":"Linking
|
184
|
+
taxonomic names to the literature","summary":"Just some thoughts as I work
|
185
|
+
through some datasets linking taxonomic names to the literature. In the diagram
|
186
|
+
above I''ve tried to capture the different situatios I encounter. Much of
|
187
|
+
the work I''ve done on this has focussed on case 1 in the diagram: I want
|
188
|
+
to link a taxonomic name to an identifier for the work in which that name
|
189
|
+
was published. In practise this means linking names to DOIs. This has the
|
190
|
+
advantage of linking to a citable indentifier, raising questions such as whether
|
191
|
+
citations...","published_at":1661188740,"updated_at":1661188748,"indexed_at":1689006804,"authors":[{"url":"https://orcid.org/0000-0002-7101-9767","name":"Roderic
|
192
|
+
Page"}],"image":null,"tags":[],"language":"en","reference":[]},{"id":"e8e95aaf-bacb-4b5a-bf91-54e903526ab2","doi":"https://doi.org/10.59350/ndtkv-6ve80","url":"https://iphylo.blogspot.com/2021/11/revisiting-rss-to-monitor-latests.html","title":"Revisiting
|
193
|
+
RSS to monitor the latest taxonomic research","summary":"Over a decade ago
|
194
|
+
RSS (RDF Site Summary or Really Simple Syndication) was attracting a lot of
|
195
|
+
interest as a way to integrate data across various websites. Many science
|
196
|
+
publishers would provide a list of their latest articles in XML in one of
|
197
|
+
three flavours of RSS (RDF, RSS, Atom). This led to tools such as uBioRSS
|
198
|
+
[1] and my own e-Biosphere Challenge: visualising biodiversity digitisation
|
199
|
+
in real time. It was a time of enthusiasm for aggregating lots of data, such
|
200
|
+
as the ill-fated PLoS...","published_at":1637700780,"updated_at":1637701172,"indexed_at":1689006804,"authors":[{"url":"https://orcid.org/0000-0002-7101-9767","name":"Roderic
|
201
|
+
Page"}],"image":null,"tags":["geocoding","NoCode","RSS"],"language":"en","reference":[{"doi":"https://doi.org/10.1093/bioinformatics/btm109","key":"ref1"},{"doi":"https://doi.org/10.1371/journal.pone.0019491","key":"ref2"},{"key":"ref3","url":"http://arxiv.org/abs/1711.00046"}]},{"id":"20b9d31e-513f-496b-b399-4215306e1588","doi":"https://doi.org/10.59350/en7e9-5s882","url":"https://iphylo.blogspot.com/2022/04/obsidian-markdown-and-taxonomic-trees.html","title":"Obsidian,
|
202
|
+
markdown, and taxonomic trees","summary":"Returning to the subject of personal
|
203
|
+
knowledge graphs Kyle Scheer has an interesting repository of Markdown files
|
204
|
+
that describe academic disciplines at https://github.com/kyletscheer/academic-disciplines
|
205
|
+
(see his blog post for more background). If you add these files to Obsidian
|
206
|
+
you get a nice visualisation of a taxonomy of academic disciplines. The applications
|
207
|
+
of this to biological taxonomy seem obvious, especially as a tool like Obsidian
|
208
|
+
enables all sorts of interesting links to be added...","published_at":1649365620,"updated_at":1649366134,"indexed_at":1689006804,"authors":[{"url":"https://orcid.org/0000-0002-7101-9767","name":"Roderic
|
209
|
+
Page"}],"image":null,"tags":["markdown","obsidian"],"language":"en","reference":[]},{"id":"d811172e-7798-403c-a83d-3d5317a9657e","doi":"https://doi.org/10.59350/w18j9-v7j10","url":"https://iphylo.blogspot.com/2022/08/papers-citing-data-that-cite-papers.html","title":"Papers
|
210
|
+
citing data that cite papers: CrossRef, DataCite, and the Catalogue of Life","summary":"Quick
|
211
|
+
notes to self following on from a conversation about linking taxonomic names
|
212
|
+
to the literature. Is there a way to turn those links into countable citations
|
213
|
+
(even if just one per database) for Google Scholar?— Wayne Maddison (@WayneMaddison)
|
214
|
+
August 3, 2022 There are different sorts of citation: Paper cites another
|
215
|
+
paper Paper cites a dataset Dataset cites a paper Citation type (1) is largely
|
216
|
+
a solved problem (although there are issues of the ownership and use of this...","published_at":1659526380,"updated_at":1659526393,"indexed_at":1689006804,"authors":[{"url":"https://orcid.org/0000-0002-7101-9767","name":"Roderic
|
217
|
+
Page"}],"image":null,"tags":["Catalogue of Life","citation","CrossRef","DataCite","DOI"],"language":"en","reference":[]},{"id":"a41163e0-9c9a-41e0-a141-f772663f2f32","doi":"https://doi.org/10.59350/gf1dw-n1v47","url":"https://iphylo.blogspot.com/2023/03/dugald-stuart-page-1936-2022.html","title":"Dugald
|
218
|
+
Stuart Page 1936-2022","summary":"My dad died last weekend. Below is a notice
|
219
|
+
in today''s New Zealand Herald. I''m in New Zealand for his funeral. Don''t
|
220
|
+
really have the words for this right now.","published_at":1678762800,"updated_at":1679469956,"indexed_at":1689006804,"authors":[{"url":"https://orcid.org/0000-0002-7101-9767","name":"Roderic
|
221
|
+
Page"}],"image":null,"tags":[],"language":"en","reference":[]},{"id":"8bc3fea6-cb86-4344-8dad-f312fbf58041","doi":"https://doi.org/10.59350/t6fb9-4fn44","url":"https://iphylo.blogspot.com/2021/12/the-business-of-extracting-knowledge.html","title":"The
|
222
|
+
Business of Extracting Knowledge from Academic Publications","summary":"Markus
|
223
|
+
Strasser (@mkstra write a fascinating article entitled \"The Business of Extracting
|
224
|
+
Knowledge from Academic Publications\". I spent months working on domain-specific
|
225
|
+
search engines and knowledge discovery apps for biomedicine and eventually
|
226
|
+
figured that synthesizing \"insights\" or building knowledge graphs by machine-reading
|
227
|
+
the academic literature (papers) is *barely useful* :https://t.co/eciOg30Odc—
|
228
|
+
Markus Strasser (@mkstra) December 7, 2021 His TL;DR: TL;DR: I...","published_at":1639180860,"updated_at":1639180881,"indexed_at":1689006804,"authors":[{"url":"https://orcid.org/0000-0002-7101-9767","name":"Roderic
|
229
|
+
Page"}],"image":null,"tags":["ai","business model","text mining"],"language":"en","reference":[]},{"id":"96fa91d5-459c-482f-aa38-dda6e0a30e20","doi":"https://doi.org/10.59350/7esgr-61v1","url":"https://iphylo.blogspot.com/2022/01/large-graph-viewer-experiments.html","title":"Large
|
230
|
+
graph viewer experiments","summary":"I keep returning to the problem of viewing
|
231
|
+
large graphs and trees, which means my hard drive has accumulated lots of
|
232
|
+
failed prototypes. Inspired by some recent discussions on comparing taxonomic
|
233
|
+
classifications I decided to package one of these (wildly incomplete) prototypes
|
234
|
+
up so that I can document the idea and put the code somewhere safe. Very cool,
|
235
|
+
thanks for sharing this-- the tree diff is similar to what J Rees has been
|
236
|
+
cooking up lately with his ''cl diff'' tool. I''ll tag...","published_at":1641122700,"updated_at":1641122959,"indexed_at":1689006804,"authors":[{"url":"https://orcid.org/0000-0002-7101-9767","name":"Roderic
|
237
|
+
Page"}],"image":null,"tags":["Google Maps","graph","Mammal Species of the
|
238
|
+
World","mammals","taxonomy"],"language":"en","reference":[]},{"id":"ab5a6e04-d55e-4901-8269-9eea65ce7178","doi":"https://doi.org/10.59350/enxas-arj18","url":"https://iphylo.blogspot.com/2022/08/can-we-use-citation-graph-to-measure.html","title":"Can
|
239
|
+
we use the citation graph to measure the quality of a taxonomic database?","summary":"More
|
240
|
+
arm-waving notes on taxonomic databases. I''ve started to add data to ChecklistBank
|
241
|
+
and this has got me thinking about the issue of data quality. When you add
|
242
|
+
data to ChecklistBank you are asked to give a measure of confidence based
|
243
|
+
on the Catalogue of Life Checklist Confidence system of one - five stars:
|
244
|
+
★ - ★★★★★. I''m scepetical about the notion of confidence or \"trust\" when
|
245
|
+
it is reduced to a star system (see also Can you trust EOL?). I could literally
|
246
|
+
pick any number of stars, there''s...","published_at":1661351580,"updated_at":1661351908,"indexed_at":1689006804,"authors":[{"url":"https://orcid.org/0000-0002-7101-9767","name":"Roderic
|
247
|
+
Page"}],"image":null,"tags":["Bibliography of Life","citation","synonymy","taxonomic
|
248
|
+
databases"],"language":"en","reference":[]},{"id":"0807f515-f31d-4e2c-9e6f-78c3a9668b9d","doi":"https://doi.org/10.59350/ymc6x-rx659","url":"https://iphylo.blogspot.com/2022/09/dna-barcoding-as-intergenerational.html","title":"DNA
|
249
|
+
barcoding as intergenerational transfer of taxonomic knowledge","summary":"I
|
250
|
+
tweeted about this but want to bookmark it for later as well. The paper “A
|
251
|
+
molecular-based identification resource for the arthropods of Finland” doi:10.1111/1755-0998.13510
|
252
|
+
contains the following: …the annotated barcode records assembled by FinBOL
|
253
|
+
participants represent a tremendous intergenerational transfer of taxonomic
|
254
|
+
knowledge … the time contributed by current taxonomists in identifying and
|
255
|
+
contributing voucher specimens represents a great gift to future generations
|
256
|
+
who will benefit...","published_at":1663150320,"updated_at":1664459850,"indexed_at":1689006804,"authors":[{"url":"https://orcid.org/0000-0002-7101-9767","name":"Roderic
|
257
|
+
Page"}],"image":null,"tags":[],"language":"en","reference":[]},{"id":"8aea47e4-f227-45f4-b37b-0454a8a7a3ff","doi":"https://doi.org/10.59350/m48f7-c2128","url":"https://iphylo.blogspot.com/2023/04/chatgpt-semantic-search-and-knowledge.html","title":"ChatGPT,
|
258
|
+
semantic search, and knowledge graphs","summary":"One thing about ChatGPT
|
259
|
+
is it has opened my eyes to some concepts I was dimly aware of but am only
|
260
|
+
now beginning to fully appreciate. ChatGPT enables you ask it questions, but
|
261
|
+
the answers depend on what ChatGPT “knows”. As several people have noted,
|
262
|
+
what would be even better is to be able to run ChatGPT on your own content.
|
263
|
+
Indeed, ChatGPT itself now supports this using plugins. Paul Graham GPT However,
|
264
|
+
it’s still useful to see how to add ChatGPT functionality to your own content
|
265
|
+
from...","published_at":1680535800,"updated_at":1680535924,"indexed_at":1689006804,"authors":[{"url":"https://orcid.org/0000-0002-7101-9767","name":"Roderic
|
266
|
+
Page"}],"image":null,"tags":[],"language":"en","reference":[]},{"id":"9d19993f-a228-4883-8933-be5b5459530d","doi":"https://doi.org/10.59350/r3g44-d5s15","url":"https://iphylo.blogspot.com/2023/06/a-taxonomic-search-engine.html","title":"A
|
267
|
+
taxonomic search engine","summary":"Tony Rees commented on my recent post
|
268
|
+
Ten years and a million links. I’ve responded to some of his comments, but
|
269
|
+
I think the bigger question deserves more space, hence this blog post. Tony’s
|
270
|
+
comment Hi Rod, I like what you’re doing. Still struggling (a little) to find
|
271
|
+
the exact point where it answers the questions that are my “entry points”
|
272
|
+
so to speak, which (paraphrasing a post of yours from some years back) start
|
273
|
+
with: Is this a name that “we” (the human race I suppose) recognise as...","published_at":1687016280,"updated_at":1687016300,"indexed_at":1688982864,"authors":[{"url":"https://orcid.org/0000-0002-7101-9767","name":"Roderic
|
274
|
+
Page"}],"image":null,"tags":[],"language":"en","reference":[]}]}'
|
275
|
+
recorded_at: Mon, 10 Jul 2023 20:41:57 GMT
|
276
|
+
recorded_with: VCR 6.2.0
|