commonmeta-ruby 3.2.0 → 3.2.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/bin/commonmeta +1 -1
- data/lib/commonmeta/cli.rb +7 -0
- data/lib/commonmeta/metadata_utils.rb +2 -2
- data/lib/commonmeta/readers/{json_post_reader.rb → json_feed_reader.rb} +16 -4
- data/lib/commonmeta/utils.rb +6 -2
- data/lib/commonmeta/version.rb +1 -1
- data/spec/cli_spec.rb +8 -0
- data/spec/fixtures/vcr_cassettes/Commonmeta_CLI/json_feed/blog_post.yml +360 -0
- data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/get_json_feed/citation_style_language.yml +360 -0
- data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/get_json_feed/citation_style_language_blog.yml +360 -0
- data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/get_json_feed/front-matter_blog.yml +1034 -0
- data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/get_json_feed/upstream.yml +2438 -0
- data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/get_json_feed/upstream_blog.yml +2438 -0
- data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/get_json_feed_item_metadata/blogger_post.yml +94 -0
- data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/get_json_feed_item_metadata/ghost_post_with_doi.yml +117 -0
- data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/get_json_feed_item_metadata/jekyll_post.yml +87 -0
- data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/get_json_feed_item_metadata/wordpress_post.yml +163 -0
- data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/get_json_feed_metadata/blogger_post.yml +141 -0
- data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/get_json_feed_metadata/ghost_post_with_doi.yml +164 -0
- data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/get_json_feed_metadata/jekyll_post.yml +134 -0
- data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/get_json_feed_metadata/wordpress_post.yml +210 -0
- data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/get_json_post_metadata/blogger_post.yml +47 -0
- data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/get_json_post_metadata/ghost_post_with_doi.yml +47 -0
- data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/get_json_post_metadata/jekyll_post.yml +47 -0
- data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/get_json_post_metadata/wordpress_post.yml +47 -0
- data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/json_feed_url/front-matter_blog.yml +221 -0
- data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/write_metadata_as_crossref/json_feed_item_from_rogue_scholar_with_doi.yml +163 -0
- data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/write_metadata_as_crossref/json_feed_item_from_upstream_blog.yml +243 -0
- data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/write_metadata_as_crossref/json_post_from_rogue_scholar_with_doi.yml +47 -0
- data/spec/fixtures/vcr_cassettes/Commonmeta_Metadata/write_metadata_as_crossref/json_post_from_upstream_blog.yml +47 -0
- data/spec/readers/{json_post_reader_spec.rb → json_feed_reader_spec.rb} +23 -2
- data/spec/utils_spec.rb +8 -0
- data/spec/writers/crossref_xml_writer_spec.rb +2 -2
- metadata +20 -3
@@ -0,0 +1,1034 @@
|
|
1
|
+
---
|
2
|
+
http_interactions:
|
3
|
+
- request:
|
4
|
+
method: get
|
5
|
+
uri: https://rogue-scholar.org/api/blogs/f0m0e38
|
6
|
+
body:
|
7
|
+
encoding: UTF-8
|
8
|
+
string: ''
|
9
|
+
headers:
|
10
|
+
Connection:
|
11
|
+
- close
|
12
|
+
Host:
|
13
|
+
- rogue-scholar.org
|
14
|
+
User-Agent:
|
15
|
+
- http.rb/5.1.1
|
16
|
+
response:
|
17
|
+
status:
|
18
|
+
code: 200
|
19
|
+
message: OK
|
20
|
+
headers:
|
21
|
+
Age:
|
22
|
+
- '0'
|
23
|
+
Cache-Control:
|
24
|
+
- public, max-age=0, must-revalidate
|
25
|
+
Content-Length:
|
26
|
+
- '91392'
|
27
|
+
Content-Type:
|
28
|
+
- application/json; charset=utf-8
|
29
|
+
Date:
|
30
|
+
- Sun, 04 Jun 2023 13:34:33 GMT
|
31
|
+
Etag:
|
32
|
+
- '"vm2lu05r3q1yh2"'
|
33
|
+
Server:
|
34
|
+
- Vercel
|
35
|
+
Strict-Transport-Security:
|
36
|
+
- max-age=63072000
|
37
|
+
X-Matched-Path:
|
38
|
+
- "/api/blogs/[slug]"
|
39
|
+
X-Vercel-Cache:
|
40
|
+
- MISS
|
41
|
+
X-Vercel-Id:
|
42
|
+
- fra1::iad1::4lpjf-1685885673258-c641c009bf16
|
43
|
+
Connection:
|
44
|
+
- close
|
45
|
+
body:
|
46
|
+
encoding: UTF-8
|
47
|
+
string: '{"id":"f0m0e38","title":"Front Matter","description":"\nThe Front Matter
|
48
|
+
Blog covers the intersection of science and technology since 2007.","language":"en","icon":null,"favicon":"https://blog.front-matter.io/favicon.png","feed_url":"https://blog.front-matter.io/atom/","feed_format":"application/atom+xml","home_page_url":"https://blog.front-matter.io/","indexed_at":"2023-01-02","license":"https://creativecommons.org/licenses/by/4.0/legalcode","generator":"Ghost","category":"Engineering
|
49
|
+
and Technology","items":[{"id":"https://doi.org/10.53731/nfa3v-h9q90","short_id":"1xdn0e03","url":"https://blog.front-matter.io/posts/dog-food-persistent-identifiers-and-metadata/","title":"Dog
|
50
|
+
food, persistent identifiers, and metadata","summary":"I am a big fan of dog
|
51
|
+
food, and I wrote about this topic already seven years ago:Eating your own
|
52
|
+
dog food is a slang term to describe that an organization should itself use
|
53
|
+
the products and services it...","date_published":"2023-04-17T17:08:26Z","date_modified":"2023-04-17T17:20:25Z","authors":[{"url":"https://orcid.org/0000-0003-1419-2405","name":"Martin
|
54
|
+
Fenner"}],"image":"https://images.unsplash.com/photo-1608408891486-f5cade977d19?crop=entropy&cs=tinysrgb&fit=max&fm=jpg&ixid=MnwxMTc3M3wwfDF8c2VhcmNofDR8fGRvZyUyMGZvb2R8ZW58MHx8fHwxNjgxNzQyOTYy&ixlib=rb-4.0.3&q=80&w=2000","content_html":"
|
55
|
+
<p><img src=\"https://images.unsplash.com/photo-1608408891486-f5cade977d19?crop=entropy&cs=tinysrgb&fit=max&fm=jpg&ixid=MnwxMTc3M3wwfDF8c2VhcmNofDR8fGRvZyUyMGZvb2R8ZW58MHx8fHwxNjgxNzQyOTYy&ixlib=rb-4.0.3&q=80&w=2000\"></p><p>I
|
56
|
+
am a big fan of dog food, and I <a href=\"https://doi.org/10.53731/r79vxn1-97aq74v-ag58n\">wrote
|
57
|
+
about this topic</a> already seven years ago:</p><blockquote><a href=\"https://newrepublic.com/article/115349/dogfooding-tech-slang-working-out-glitches\">Eating
|
58
|
+
your own dog food</a> is a slang term to describe that an organization should
|
59
|
+
itself use the products and services it provides. </blockquote><p>One of the
|
60
|
+
major projects I am working on right now is the <a href=\"https://rogue-scholar.org\">Rogue
|
61
|
+
Scholar</a> science blog archive <a href=\"https://doi.org/10.53731/z9v2s-bh329\">that
|
62
|
+
launched</a> at the beginning of the month. As part of this work – but also
|
63
|
+
because I am very interested in this – I read a lot of science blogs. And
|
64
|
+
today I released an update of the Rogue Scholar that makes this easier.</p><h3
|
65
|
+
id=\"persistent-identifiers-for-science-blogs\">Persistent identifiers for
|
66
|
+
science blogs</h3><p>People who know me know that I care about persistent
|
67
|
+
identifiers for scholarly resources. I have worked for seven years for <a
|
68
|
+
href=\"https://datacite.org\">DataCite</a>, a DOI registration to register
|
69
|
+
datasets, software, and other non-textual resources. I was involved in the
|
70
|
+
launch of <a href=\"https://orcid.org\">ORCID</a> (identifiers for researchers)
|
71
|
+
in 2012 and <a href=\"https://ror.org\">ROR</a> (identifiers for research
|
72
|
+
organizations) in 2019. So it shouldn''t surprise anyone that I am officially
|
73
|
+
announcing the Rogue Scholar identifier for science blogs today. Each blog
|
74
|
+
that has registered with the Rogue Scholar is uniquely identified, e.g. </p><ul><li>Upstream
|
75
|
+
<a href=\"https://rogue-scholar.org/pm0p222\">https://rogue-scholar.org/pm0p222</a>,</li><li>GigaBlog
|
76
|
+
<a href=\"https://rogue-scholar.org/3ffcd46\">https://rogue-scholar.org/3ffcd46</a>,
|
77
|
+
and of course</li><li>Front Matter <a href=\"https://rogue-scholar.org/f0m0e38\">https://rogue-scholar.org/f0m0e38</a></li></ul><p>Persistent
|
78
|
+
identifiers should not have any semantic meaning (e.g. the blog name) in them,
|
79
|
+
as names can change over time. And they should not be linked to a domain name,
|
80
|
+
(e.g. upstream.force11.org) as those might also change. The Rogue Scholar
|
81
|
+
identifier uses a 7-digit random string generated by the <a href=\"https://github.com/front-matter/base32-url\">base32
|
82
|
+
algorithm</a> and a two-digit checksum (the Front Matter identifier for example
|
83
|
+
was generated with the random number 16127113320). DataCite, ROR, and the
|
84
|
+
repository <a href=\"https://zenodo.org\">Zenodo</a> use similarly constructed
|
85
|
+
unique identifiers. Their main advantage over <a href=\"https://en.wikipedia.org/wiki/Universally_unique_identifier\">UUIDs</a>
|
86
|
+
is that they are easier to handle because of their compact size – there are
|
87
|
+
still more than three billion unique strings for the Rogue Scholar identifier.
|
88
|
+
Finally, persistent identifiers should be actionable, which means expressed
|
89
|
+
as URLs that a human or machine can follow.</p><p>Why did I not use International
|
90
|
+
Standard Serial Numbers (<a href=\"https://www.issn.org/\">ISSNs</a>), well-established
|
91
|
+
identifiers that also work for blogs (the Front Matter blog has ISSN <a href=\"https://portal.issn.org/resource/ISSN/2749-9952\">2749-9952</a>)?
|
92
|
+
Why ISSN registration can be easy and cheap, registration can become an issue,
|
93
|
+
especially for new blogs that are just beginning to publish. And ISSNs have
|
94
|
+
only the most basic metadata (e.g. title, country). And why not use digital
|
95
|
+
object identifiers (<a href=\"https://www.doi.org/\">DOIs</a>)? They have
|
96
|
+
traditionally been used for scholarly outputs such as journal articles, datasets,
|
97
|
+
and <a href=\"https://doi.org/10.53731/fezg09h-hgn1gzm\">blog posts</a>. While
|
98
|
+
you can register DOIs for serials such as journals, conference proceedings,
|
99
|
+
or blogs, there is currently no standard practice to do so.</p><h3 id=\"metadata-for-science-blogs\">Metadata
|
100
|
+
for science blogs</h3><p>Persistent identifiers are not really useful without
|
101
|
+
meaningful metadata. For science blogs, this means at least the following:</p><ul><li>Blog
|
102
|
+
name</li><li>Blog short description</li><li>Blog URL</li><li>Alternate identifiers,
|
103
|
+
e.g ISSN and/or DOI</li><li>Blog editor(s)</li><li>License for the content,
|
104
|
+
e.g Creative Commons Attribution (<a href=\"https://creativecommons.org/licenses/by/4.0/\">CC-BY</a>)</li><li>Subject
|
105
|
+
area(s) for the content, e.g. aligned with the <a href=\"https://en.wikipedia.org/wiki/Fields_of_Science_and_Technology\">OECD
|
106
|
+
Fields of Science and Technology</a></li></ul><p>For the blogs participating
|
107
|
+
in the Rogue Scholar, I am collecting this information and will make it available
|
108
|
+
in the Rogue Scholar search. To not start from scratch, I am using the metadata
|
109
|
+
available from most blogs via <a href=\"https://doi.org/10.53731/d6vdvbt-tffmezj\">RSS
|
110
|
+
or Atom feed</a>. For some information, e.g. license or subject area, I need
|
111
|
+
to ask additional questions to the blog editor.</p><p>RSS and Atom both use
|
112
|
+
XML, rather than JSON, which is much more pleasant to work with. Therefore
|
113
|
+
– after the initial conversion of RSS or Atom XML – I can use <a href=\"https://www.jsonfeed.org/\">JSON
|
114
|
+
Feed</a> to describe blog metadata, and the format can be extended to the
|
115
|
+
needs of the Rogue Scholar. To fetch the JSON Feed of a blog included in the
|
116
|
+
Rogue Scholar, use the identifier. Either by appending <code>.json</code>
|
117
|
+
to the identifier (e.g. <a href=\"https://rogue-scholar.org/h56tk29.json\">https://rogue-scholar.org/h56tk29.json</a>)
|
118
|
+
or by entering the identifier (<a href=\"https://rogue-scholar.org/h56tk29.json\">https://rogue-scholar.org/h56tk29</a>)
|
119
|
+
in your RSS reader. The reader will automatically find the JSON Feed via the
|
120
|
+
link tag in the page header:</p><pre><code><link rel=\"alternate\" title=\"Jabberwocky
|
121
|
+
Ecology\" type=\"application/feed+json\" href=\"https://rogue-scholar.org/h56tk29.json\"/></code></pre><p>The
|
122
|
+
RSS Reader (assuming it supports JSON Feed, as most readers do) will subscribe
|
123
|
+
you to the JSON Feed of the blog, simplifying the reading of science blogs.
|
124
|
+
More work is needed to polish the RSS/Atom Feed conversion to JSON Feed done
|
125
|
+
by the Rogue Scholar and streamline subscribing to multiple blogs at once,
|
126
|
+
e.g. using <a href=\"https://doi.org/10.53731/wa7k5-v4t16\">OPML</a>. </p><p>JSON
|
127
|
+
Feed can also be used for the metadata and content of blog posts, so again
|
128
|
+
I don''t need to use XML, e.g. Journal Article Tag Suite (<a href=\"https://jats.nlm.nih.gov/\">JATS</a>).
|
129
|
+
For blog posts, I will continue to <a href=\"https://doi.org/10.53731/rb7xw01-97aq74v-ag7qh\">use
|
130
|
+
DOIs</a>, as they work well, and I am making progress with Rogue Scholar integration
|
131
|
+
(see for example this blog using DOIs already: <a href=\"https://rogue-scholar.org/f4wdg32\">https://rogue-scholar.org/f4wdg32</a>)</p><h3
|
132
|
+
id=\"bringing-everything-together\">Bringing everything together</h3><p>How
|
133
|
+
does the above help with finding, reading, sharing, or otherwise reusing science
|
134
|
+
blogs? The work released today should make it easier to find interesting science
|
135
|
+
blogs via the Rogue Scholar and subscribe to them via your RSS reader of choice.
|
136
|
+
Over time we will hopefully see evolving community standards regarding blog
|
137
|
+
persistent identifiers and metadata, following the <a href=\"https://www.go-fair.org/fair-principles/\">FAIR
|
138
|
+
Principles</a>, while at the same time pushing hard for <a href=\"https://www.scienceeurope.org/our-priorities/open-access/diamond-open-access/\">Diamond
|
139
|
+
Open Access</a>, keeping the cost and technical complexity affordable.</p>
|
140
|
+
","tags":["Feature"],"language":"en"},{"id":"https://doi.org/10.53731/z9v2s-bh329","short_id":"y2d1rjgr","url":"https://blog.front-matter.io/posts/rogue-scholar-open-for-business/","title":"The
|
141
|
+
Rogue Scholar is now open for business","summary":"The Rogue Scholar science
|
142
|
+
blog archive launched with limited functionality on April 3rd. Interested
|
143
|
+
science blogs can go to the sign-up page, provide some basic information via
|
144
|
+
the sign-up form, and then will...","date_published":"2023-04-04T08:43:36Z","date_modified":"2023-04-04T09:31:14Z","authors":[{"url":"https://orcid.org/0000-0003-1419-2405","name":"Martin
|
145
|
+
Fenner"}],"image":"https://images.unsplash.com/photo-1575663620136-5ebbfcc2c597?crop=entropy&cs=tinysrgb&fit=max&fm=jpg&ixid=MnwxMTc3M3wwfDF8c2VhcmNofDR8fG9wZW4lMjBmb3IlMjBidXNpbmVzc3xlbnwwfHx8fDE2ODA1OTI3NTU&ixlib=rb-4.0.3&q=80&w=2000","content_html":"
|
146
|
+
<p><img src=\"https://images.unsplash.com/photo-1575663620136-5ebbfcc2c597?crop=entropy&cs=tinysrgb&fit=max&fm=jpg&ixid=MnwxMTc3M3wwfDF8c2VhcmNofDR8fG9wZW4lMjBmb3IlMjBidXNpbmVzc3xlbnwwfHx8fDE2ODA1OTI3NTU&ixlib=rb-4.0.3&q=80&w=2000\"></p><p>The
|
147
|
+
<a href=\"https://rogue-scholar.org/\">Rogue Scholar</a> science blog archive
|
148
|
+
launched with limited functionality on April 3rd. Interested science blogs
|
149
|
+
can go to the sign-up page, provide some basic information via the <a href=\"https://jvinjjenjik.typeform.com/to/uxgAsHPl\">sign-up
|
150
|
+
form</a>, and then will be added to the Rogue Scholar archive within two business
|
151
|
+
days. </p><p>To be included in the service, your blog needs to:</p><ul><li>be
|
152
|
+
about science or scholarship and written in English or German (more languages
|
153
|
+
will follow later, reach out to me if you can help),</li><li>make the full-text
|
154
|
+
content available via RSS feed and distributed under the terms of the Creative
|
155
|
+
Commons Attribution license (<a href=\"https://creativecommons.org/licenses/by/4.0/legalcode\">CC-BY</a>).</li></ul><p>Blogs
|
156
|
+
that have signed up for the service (more than twenty so far) are listed in
|
157
|
+
the <a href=\"https://rogue-scholar.org/blogs\">Rogue Scholar catalog of science
|
158
|
+
blogs</a> that <a href=\"https://doi.org/10.53731/n7vvs-h6995\">launched last
|
159
|
+
week</a>. And since yesterday summaries of the latest fifteen blog posts of
|
160
|
+
each blog are also available.</p><figure class=\"kg-card kg-image-card kg-card-hascaption\"><img
|
161
|
+
src=\"https://blog.front-matter.io/content/images/2023/04/Bildschirmfoto-2023-04-04-um-10.12.58.png\"
|
162
|
+
class=\"kg-image\" alt loading=\"lazy\" width=\"1882\" height=\"1428\" srcset=\"https://blog.front-matter.io/content/images/size/w600/2023/04/Bildschirmfoto-2023-04-04-um-10.12.58.png
|
163
|
+
600w, https://blog.front-matter.io/content/images/size/w1000/2023/04/Bildschirmfoto-2023-04-04-um-10.12.58.png
|
164
|
+
1000w, https://blog.front-matter.io/content/images/size/w1600/2023/04/Bildschirmfoto-2023-04-04-um-10.12.58.png
|
165
|
+
1600w, https://blog.front-matter.io/content/images/2023/04/Bildschirmfoto-2023-04-04-um-10.12.58.png
|
166
|
+
1882w\" sizes=\"(min-width: 720px) 720px\"><figcaption><a href=\"https://rogue-scholar.org/blogs/pm0p222\">Blog
|
167
|
+
posts displayed at the Rogue Scholar</a></figcaption></figure><p>These summaries
|
168
|
+
(precisely the information you get in the RSS feed) serve two purposes:</p><ul><li>for
|
169
|
+
readers: learn more about that particular science blog. Reading the full-text
|
170
|
+
post or other blog posts is only one click away</li><li>for blog authors and
|
171
|
+
Rogue Scholar staff: tweak the blog and/or Rogue Scholar if there are issues
|
172
|
+
with archiving. </li></ul><p>The screenshot highlights several considerations
|
173
|
+
when using the RSS Feed to archive a science blog in the Rogue Scholar:</p><ul><li>optional
|
174
|
+
but desired metadata, e.g logo, description, and language for blogs or description,
|
175
|
+
tags, and feature image for blog posts</li><li>handling authors, including
|
176
|
+
full names instead of usernames, multiple authors, and author identifiers
|
177
|
+
(ORCID)</li><li>handling DOIs, including exposing them in the RSS feed, and
|
178
|
+
making sure no DOI exists for the post yet</li></ul><p>The Rogue Scholar is
|
179
|
+
now open for business, and I hope the limited functionality (or <a href=\"https://www.zentao.pm/blog/mvp-minimum-viable-product-965.html\">minimum
|
180
|
+
viable product</a>) launched this week makes it an attractive service for
|
181
|
+
blog readers and authors to try out. The next big milestone is the launch
|
182
|
+
of the full-text index for searching and archiving, and that is planned to
|
183
|
+
happen within the next three months. Followed by DOI registration for blog
|
184
|
+
posts.</p> ","tags":["News"],"language":null},{"id":"https://doi.org/10.53731/h4b6c-h1444","short_id":"j3ejvwep","url":"https://blog.front-matter.io/posts/feedback-for-blog-publishers/","title":"Feedback
|
185
|
+
for science blog publishers","summary":"The Rogue Scholar science blog archive
|
186
|
+
launched last week. Going forward the focus is on improving the service and
|
187
|
+
adding more blogs. This includes giving blog authors feedback on how they
|
188
|
+
can improve their...","date_published":"2023-04-11T12:31:40Z","date_modified":"2023-04-14T20:50:32Z","authors":[{"url":"https://orcid.org/0000-0003-1419-2405","name":"Martin
|
189
|
+
Fenner"}],"image":"https://blog.front-matter.io/content/images/2023/04/Bildschirmfoto-2023-04-11-um-13.14.02.png","content_html":"
|
190
|
+
<p><img src=\"https://blog.front-matter.io/content/images/2023/04/Bildschirmfoto-2023-04-11-um-13.14.02.png\"></p><p>The
|
191
|
+
<a href=\"https://rogue-scholar.org/\">Rogue Scholar</a> science blog archive
|
192
|
+
<a href=\"https://doi.org/10.53731/z9v2s-bh329\">launched last week</a>. Going
|
193
|
+
forward the focus is on improving the service and adding more blogs. This
|
194
|
+
includes giving blog authors feedback on how they can improve their RSS/Atom
|
195
|
+
feeds – used by the Rogue Scholar to collect and archive the blog content.</p><h3
|
196
|
+
id=\"feedback-for-science-blog-publishers\">Feedback for science blog publishers</h3><p>A
|
197
|
+
good starting point is author information, which often can be improved. The
|
198
|
+
first step is to support multiple authors and support their full (given and
|
199
|
+
family) names instead of usernames. It is useful to include ORCID author identifiers,
|
200
|
+
best done by using the author website field of the blogging platform. This
|
201
|
+
information can then be included in the blog <a href=\"https://www.rfc-editor.org/rfc/rfc4287\">Atom
|
202
|
+
feed</a>, which works better for this than <a href=\"https://en.wikipedia.org/wiki/RSS\">RSS
|
203
|
+
feeds</a>.</p><p>The blog (RSS or Atom) feed includes a link for each blog
|
204
|
+
post but also an <strong>id</strong> (Atom) or <strong>guid</strong> (RSS).
|
205
|
+
Ideally, this id/guid is globally unique, does not change over time, and can
|
206
|
+
be used as a web link. <a href=\"https://ask.library.uic.edu/faq/345899\">DOIs</a>
|
207
|
+
are a perfect fit for this id/guid field, and several blogs included in the
|
208
|
+
Rogue Scholar do this already (<a href=\"https://rogue-scholar.org/blogs/f0m0e38\">this
|
209
|
+
blog</a> but also <a href=\"https://rogue-scholar.org/blogs/pm0p222\">Upstream</a>).
|
210
|
+
Many blogging platforms have a <a href=\"https://developer.wordpress.org/reference/functions/wp_get_canonical_url/\">canonical_url</a>
|
211
|
+
field that can be used to store the DOI, separate from the URL.</p><p>Abstracts
|
212
|
+
are useful for blog posts and widely supported. Unfortunately, there is no
|
213
|
+
standard way to provide them in RSS or Atom feeds. A good practice is to use
|
214
|
+
text and not HTML and to limit the total number of characters (the Rogue Scholar
|
215
|
+
limits abstracts to 210 characters).</p><p>Feature images for blog posts are
|
216
|
+
again widely used but there is no standard way to do this in RSS or Atom feeds.
|
217
|
+
Examples of Rogue Scholar blogs using feature images are <a href=\"https://rogue-scholar.org/blogs/n6x4a73\">Chris
|
218
|
+
Hartgerink</a>, <a href=\"https://rogue-scholar.org/blogs/h7bpg11\">OA.Works</a>
|
219
|
+
and <a href=\"https://rogue-scholar.org/blogs/f4wdg32\">Syldavia Gazette</a>.</p><h3
|
220
|
+
id=\"blog-statistics\">Blog statistics</h3><p>This week I added <a href=\"https://rogue-scholar.org/#stats\">basic
|
221
|
+
statistics</a> for the Rogue Scholar that give preliminary insights into the
|
222
|
+
kind of science blogs covered by the Rogue Scholar. The <strong>category</strong>
|
223
|
+
is the top-level classification of the <a href=\"https://www.oecd.org/science/inno/38235147.pdf\">OECD
|
224
|
+
Fields of Science and Technology</a>. Many blogs cover Natural Sciences, Engineering
|
225
|
+
and Technology, Social Sciences – Health and Medical Sciences, Humanities,
|
226
|
+
and Agricultural Sciences are covered less. Almost all currently included
|
227
|
+
blogs are in the English <strong>language</strong>, please reach out if you
|
228
|
+
manage a blog in another language. Knowing the blogging <strong>platform</strong>
|
229
|
+
helps integrate the various RSS feeds into the Rogue Scholar, and the results
|
230
|
+
are as expected. Wordpress is the most popular blogging platform, but science
|
231
|
+
blogs also use a variety of other platforms, including Ghost, Medium, and
|
232
|
+
Blogger. Another interesting key performance indicator (KPI) is the total
|
233
|
+
number of blogs and blog posts included, but this needs more work as this
|
234
|
+
information is not immediately available.</p><figure class=\"kg-card kg-image-card
|
235
|
+
kg-card-hascaption\"><img src=\"https://blog.front-matter.io/content/images/2023/04/Bildschirmfoto-2023-04-11-um-13.19.27.png\"
|
236
|
+
class=\"kg-image\" alt loading=\"lazy\" width=\"2000\" height=\"716\" srcset=\"https://blog.front-matter.io/content/images/size/w600/2023/04/Bildschirmfoto-2023-04-11-um-13.19.27.png
|
237
|
+
600w, https://blog.front-matter.io/content/images/size/w1000/2023/04/Bildschirmfoto-2023-04-11-um-13.19.27.png
|
238
|
+
1000w, https://blog.front-matter.io/content/images/size/w1600/2023/04/Bildschirmfoto-2023-04-11-um-13.19.27.png
|
239
|
+
1600w, https://blog.front-matter.io/content/images/2023/04/Bildschirmfoto-2023-04-11-um-13.19.27.png
|
240
|
+
2152w\" sizes=\"(min-width: 720px) 720px\"><figcaption><a href=\"https://rogue-scholar.org/#stats\">Basic
|
241
|
+
statistics for the Rogue Scholar</a></figcaption></figure><h3 id=\"usage-statistics\">Usage
|
242
|
+
statistics</h3><p>The Usage Stats for the Rogue Scholar are publicly available
|
243
|
+
<a href=\"https://plausible.io/rogue-scholar.org\">here</a>. The numbers are
|
244
|
+
still small and don''t cover individual posts, or usage numbers from the blog
|
245
|
+
itself, both of which may come over time. The Rogue Scholar intentionally
|
246
|
+
isn''t collecting any personal information or using any cookies, but the available
|
247
|
+
public information can give important insights (e.g. the countries or referer
|
248
|
+
pages where users come from).</p><figure class=\"kg-card kg-image-card kg-card-hascaption\"><img
|
249
|
+
src=\"https://blog.front-matter.io/content/images/2023/04/Bildschirmfoto-2023-04-11-um-13.18.09.png\"
|
250
|
+
class=\"kg-image\" alt loading=\"lazy\" width=\"2000\" height=\"1146\" srcset=\"https://blog.front-matter.io/content/images/size/w600/2023/04/Bildschirmfoto-2023-04-11-um-13.18.09.png
|
251
|
+
600w, https://blog.front-matter.io/content/images/size/w1000/2023/04/Bildschirmfoto-2023-04-11-um-13.18.09.png
|
252
|
+
1000w, https://blog.front-matter.io/content/images/size/w1600/2023/04/Bildschirmfoto-2023-04-11-um-13.18.09.png
|
253
|
+
1600w, https://blog.front-matter.io/content/images/2023/04/Bildschirmfoto-2023-04-11-um-13.18.09.png
|
254
|
+
2038w\" sizes=\"(min-width: 720px) 720px\"><figcaption><a href=\"https://plausible.io/rogue-scholar.org\">Daily
|
255
|
+
traffic to the Rogue Scholar</a></figcaption></figure> ","tags":["Feature"],"language":"en"},{"id":"https://doi.org/10.53731/br4gac1-1k9ptea","short_id":"1jdk0oe5","url":"https://blog.front-matter.io/posts/talking-about-talbot/","title":"Talking
|
256
|
+
about Talbot","summary":"Talbot is a Python package I started working on at
|
257
|
+
the end of 2022 and plan to release to the Python Package Index (PyPi) in
|
258
|
+
March. Talbot converts scholarly metadata in various formats, including Crossref,...","date_published":"2023-02-13T19:19:08Z","date_modified":"2023-02-13T19:20:04Z","authors":[{"url":"https://orcid.org/0000-0003-1419-2405","name":"Martin
|
259
|
+
Fenner"}],"image":"https://blog.front-matter.io/content/images/2023/02/TalbotHound_Talbot_Shrewsbury_Book_1445.png","content_html":"
|
260
|
+
<p><img src=\"https://blog.front-matter.io/content/images/2023/02/TalbotHound_Talbot_Shrewsbury_Book_1445.png\"></p><p><a
|
261
|
+
href=\"https://github.com/front-matter/talbot\">Talbot</a> is a Python package
|
262
|
+
I started working on at the end of 2022 and plan to release to the Python
|
263
|
+
Package Index (<a href=\"https://pypi.org/\">PyPi</a>) in March. Talbot converts
|
264
|
+
scholarly metadata in various formats, including Crossref, DataCite, Schema.org,
|
265
|
+
BibTeX, RIS, and formatted citations – the complete list of supported formats
|
266
|
+
is <a href=\"https://docs.front-matter.io/talbot#supported-metadata-formats\">here</a>.
|
267
|
+
Talbot is a Python version of the <a href=\"https://github.com/datacite/bolognese\">Bolognese
|
268
|
+
Ruby gem</a> that I worked on with my DataCite colleagues starting in 2018.
|
269
|
+
After leaving DataCite in 2021 I <a href=\"https://doi.org/10.53731/rdv0jyq-vpb7a9j-zwqzg\">wrote
|
270
|
+
a fork called Briard</a> that added important metadata conversions, namely
|
271
|
+
writing Crossref XML for DOI registration and reading/writing Citation File
|
272
|
+
Format (<a href=\"https://citation-file-format.github.io/\">CFF</a>) for software
|
273
|
+
metadata.</p><p>Talbot, Bolognese, and Briard are all names for dog breeds,
|
274
|
+
the naming convention I have used for most of the Open Source software I have
|
275
|
+
written since releasing the Open Source software <a href=\"https://github.com/lagotto/lagotto\">Lagotto</a>
|
276
|
+
for tracking article-level metrics in 2012.</p><p>My two main use cases for
|
277
|
+
Talbot (and Bolognese) are <a href=\"https://citation.crosscite.org/docs.html\">DOI
|
278
|
+
content negotiation</a>, using DOI metadata to generate metadata in other
|
279
|
+
formats such as BibTeX or as formatted citation in one of the thousands of
|
280
|
+
available citation styles. The Python version will enhance the <a href=\"https://inveniordm.docs.cern.ch/\">InvenioRDM</a>
|
281
|
+
Open Source repository platform, e.g. adding RIS and Schema.org JSON-LD to
|
282
|
+
the supported export formats. The other main use case is supporting DOI registration
|
283
|
+
via multiple input formats. Since 2021 the Briard gem for example allows me
|
284
|
+
to register DOIs for this blog as well as the <a href=\"https://upstream.force11.org/\">Force11
|
285
|
+
Upstream blog</a> using metadata in Schema.org format. With Talbot I want
|
286
|
+
to enable Crossref DOI registration in the InvenioRDM platform for use cases
|
287
|
+
where this makes sense, e.g blog posts or preprints. Talbot will help register
|
288
|
+
DOIs from RSS feeds as part of <a href=\"https://rogue-scholar.org/\">the
|
289
|
+
Rogue Scholar </a>blog archive I am launching in Q2 2023. </p><p>One lesson
|
290
|
+
learned with Bolognese/Briard is that the platform/language matters. The InvenioRDM
|
291
|
+
backend is written in Python (the Frontend is in Javascript/React). And while
|
292
|
+
Bolognese/Briard can be used via the command line or in environments such
|
293
|
+
as GitHub Actions that use Docker-based microservices where the language doesn''t
|
294
|
+
really matter, having the scholarly metadata conversion available in a Python
|
295
|
+
environment makes a huge difference. So I took the plunge of rewriting a fairly
|
296
|
+
complex library in another language. I am fully aware that there are more
|
297
|
+
languages used for writing scholarly infrastructure code, but for the next
|
298
|
+
few years, Python addresses my needs and is hopefully useful to other infrastructure
|
299
|
+
projects.</p><p>While the overall architecture for the evolving Talbot library
|
300
|
+
looks rather similar to Briard, I am making some changes based on my experience
|
301
|
+
over the last five years of working on generic scholarly metadata conversions:</p><ul><li><strong>JSON
|
302
|
+
is the core serialization format</strong>. Metadata in XML format (e.g. DataCite,
|
303
|
+
Crossref, JATS) are important, but no longer used internally for Talbot validation.
|
304
|
+
I will instead migrate to JSON schema for metadata validations in Talbot.
|
305
|
+
DataCite, Crossref, and InvenioRDM use Elasticsearch/OpenSearch and thus JSON
|
306
|
+
to index metadata. DataCite XML is still widely used but deprecated for several
|
307
|
+
years, as on submission the XML is converted to JSON internally.</li><li><strong>Type
|
308
|
+
hints. </strong>Support for static typing is a trend in dynamic languages
|
309
|
+
Javascript (where Typescript is very popular), Ruby (since Ruby 3.0), and
|
310
|
+
also Python. Talbot uses type hints for linting and that helps with error
|
311
|
+
checking.</li><li><strong>Support unstructured references</strong>. Before
|
312
|
+
DataCite Metadata Schema 4.4 (released in April 2021), only references providing
|
313
|
+
an identifier such as a DOI were supported. Crossref has always supported
|
314
|
+
unstructured references, and an identifier isn''t available unless content
|
315
|
+
exists in digital form. In the first Talbot release, I take the \"fallback
|
316
|
+
solution\" approach, providing unstructured metadata if a DOI or other persistent
|
317
|
+
identifier for a reference doesn''t exist.</li><li><strong>Author names are
|
318
|
+
hard</strong>. One of the biggest challenges with scholarly metadata is author
|
319
|
+
names. In formatted citations and BibTeX separate given and family names are
|
320
|
+
important, and a single name field for both given and family names is a constant
|
321
|
+
source of errors and frustrations. In Talbot I follow both Crossref and Citeproc
|
322
|
+
JSON metadata in that you need either a single name or separate given and
|
323
|
+
family names.</li><li><strong>Dates are hard</strong>. Dates are surprisingly
|
324
|
+
hard in scholarly metadata. There are multiple kinds of dates not always used
|
325
|
+
consistently, and incomplete dates such as year-only are very common. One
|
326
|
+
approach to dealing with incomplete dates is encoding the parts year, month,
|
327
|
+
and day separately, used by Citeproc JSON and Crossref in their REST API.
|
328
|
+
The better solution is to use the ISO8601 standard that supports incomplete
|
329
|
+
dates. Other challenges are approximate dates (e.g. <em>circa 1650</em>) and
|
330
|
+
date ranges. These kinds of dates are supported via the Extended Date and
|
331
|
+
Time Format (<a href=\"https://www.loc.gov/standards/datetime/\">EDTF</a>),
|
332
|
+
but working with EDTF is hard in code.</li><li><strong>Idiosyncrasies and
|
333
|
+
inconsistencies</strong>. There is always a balancing act between supporting
|
334
|
+
a metadata standard thoughtfully and not getting lost in edge cases. DataCite
|
335
|
+
metadata (via Dublin Core on which it is based) makes it hard to work with
|
336
|
+
some of the bibliographic metadata common for books, articles, and other textual
|
337
|
+
resources. For example page numbers or the journal name. Crossref metadata
|
338
|
+
has the tendency to treat things differently depending on the content type,
|
339
|
+
e.g. the ISSN. After working on Bolognese for five ideas I will make some
|
340
|
+
changes to how to best support metadata across different formats. It is clear
|
341
|
+
that there is no single overarching scholarly metadata format, the internal
|
342
|
+
format used by Bolognese, Briard, and now Talbot is a pragmatic mix of the
|
343
|
+
different implementations.</li></ul> ","tags":["Feature"],"language":"en"},{"id":"https://doi.org/10.53731/cp7apdj-jk5f471","short_id":"56gl49d9","url":"https://blog.front-matter.io/posts/announcing-commonmeta/","title":"Announcing
|
344
|
+
Commonmeta","summary":"This week I launched Commonmeta, a new scholarly metadata
|
345
|
+
standard described at https://commonmeta.org. Commonmeta is the result of
|
346
|
+
working on conversion tools for scholarly metadata for many years. One...","date_published":"2023-03-09T17:36:44Z","date_modified":"2023-03-09T17:36:44Z","authors":[{"url":"https://orcid.org/0000-0003-1419-2405","name":"Martin
|
347
|
+
Fenner"}],"image":"https://blog.front-matter.io/content/images/2023/03/standards_2x.png","content_html":"
|
348
|
+
<p><img src=\"https://blog.front-matter.io/content/images/2023/03/standards_2x.png\"></p><p>This
|
349
|
+
week I launched <strong>Commonmeta</strong>, a new scholarly metadata standard
|
350
|
+
described at <a href=\"https://commonmeta.org/\">https://commonmeta.org</a>.
|
351
|
+
Commonmeta is the result of working on conversion tools for scholarly metadata
|
352
|
+
for many years. One conclusion early on was that these conversions are many-to-many,
|
353
|
+
so it becomes much easier to have an internal format that is the intermediate
|
354
|
+
step for these conversions.</p><p>Commonmeta is inspired by two initiatives:
|
355
|
+
<a href=\"https://codemeta.github.io/\">Codemeta</a> and <a href=\"https://commonmark.org\">Commonmark</a>.
|
356
|
+
CodeMeta contributors are creating a minimal metadata schema for science software
|
357
|
+
and code, in JSON and XML. The goal of CodeMeta is to create a concept vocabulary
|
358
|
+
that can be used to standardize the exchange of software metadata across repositories
|
359
|
+
and organizations. Commonmark is a strongly defined, highly compatible specification
|
360
|
+
of Markdown, along with a suite of comprehensive tests to validate Markdown
|
361
|
+
implementations against this specification. </p><p>These two specifications
|
362
|
+
not only inspired the name but also the principles of how I want to see Commonmeta
|
363
|
+
operate:</p><ul><li>driven by real-world implementations and not committees</li><li>features
|
364
|
+
that focus on what is common in existing implementations/formats</li><li>a
|
365
|
+
testable specification</li></ul><p>The website goes into a little bit more
|
366
|
+
detail about why I didn''t pick any the existing standards but instead came
|
367
|
+
up with a new metadata standard. This is a familiar pattern made famous by
|
368
|
+
the XKCD comic shown above.</p><p>As I want this to be driven by real-world
|
369
|
+
implementations and not committees, I also in the last few weeks launched<a
|
370
|
+
href=\"https://github.com/front-matter/commonmeta-py\"> commonmeta-py</a>,
|
371
|
+
a Python implementation of the standard available on <a href=\"https://pypi.org/project/commonmeta-py/\">PyPi</a>.
|
372
|
+
And in a few months, I hope to have tweaked the <a href=\"https://github.com/front-matter/briard\">Ruby
|
373
|
+
Gem</a> that I originally wrote a few years ago to support Commonmeta as the
|
374
|
+
internal format.</p><p>With testable specification, I mean both a JSON Schema
|
375
|
+
to describe Commonmeta and many, many tests that validate the conversions
|
376
|
+
with real-world data. The JSON Schema is available <a href=\"https://commonmeta.org/schema\">here</a>,
|
377
|
+
and will become stable once it reaches version 1.0. commonmeta-py comes with
|
378
|
+
lots of tests, but I hope to further improve the test coverage.</p><p>Please
|
379
|
+
reach out to me if you want to help with Commonmeta, in particular, work on
|
380
|
+
implementations in other languages, such as Javascript, PHP, or Java.</p>
|
381
|
+
","tags":["News"],"language":"en"},{"id":"https://doi.org/10.53731/eyf75cj-jsgv26c","short_id":"9memqjg2","url":"https://blog.front-matter.io/posts/building-blocks/","title":"Building
|
382
|
+
Blocks for a Scholarly Blog Archive","summary":"Another follow-up post, extending
|
383
|
+
three earlier posts (see references), on the Scholarly Blog Archive that Front
|
384
|
+
Matter is building and that I plan to launch in the first half of 2023. I
|
385
|
+
have been thinking...","date_published":"2022-12-21T14:23:47Z","date_modified":"2022-12-21T20:57:38Z","authors":[{"url":"https://orcid.org/0000-0003-1419-2405","name":"Martin
|
386
|
+
Fenner"}],"image":"https://blog.front-matter.io/content/images/2022/12/James_Brown_-55208420--1.jpeg","content_html":"
|
387
|
+
<p><img src=\"https://blog.front-matter.io/content/images/2022/12/James_Brown_-55208420--1.jpeg\"></p><p>Another
|
388
|
+
follow-up post, extending three earlier posts (see references), on the Scholarly
|
389
|
+
Blog Archive that Front Matter is building and that I plan to launch in the
|
390
|
+
first half of 2023. I have been thinking about the building blocks that make
|
391
|
+
this blog archive work:</p><h3 id=\"diamond-open-access\">Diamond Open Access</h3><blockquote>Diamond
|
392
|
+
open access (OA) is an open access business model in which no fees are charged
|
393
|
+
to either authors or readers. <a href=\"https://www.dfg.de/en/research_funding/announcements_proposals/2022/info_wissenschaft_22_26/index.html\">German
|
394
|
+
Research Foundation</a></blockquote><p>Using this term sounds strange in the
|
395
|
+
context of scholarly blog posts, but it means that scholarly blog infrastructure
|
396
|
+
should be free to publish and free to read. One challenge with Open Access
|
397
|
+
for publications, particularly in disciplines such as medicine and life sciences
|
398
|
+
where there is a lot of money, is that there are no drivers for driving down
|
399
|
+
cost, and subscription fees have often been converted to article processing
|
400
|
+
charges (APC). And instead of technological advances making scholarly publishing
|
401
|
+
cheaper over time, the costs for authors and readers (and their institutions
|
402
|
+
and funders who ultimately pay for this) are only increasing.</p><p>There
|
403
|
+
is of course already a lot of Diamond Open Access, and infrastructures for
|
404
|
+
research data and research software also typically don''t charge authors or
|
405
|
+
readers. This causes other problems in terms of sustainable scholarly infrastructure
|
406
|
+
and innovation, but I think it is an essential building block for the science
|
407
|
+
blog archive Front Matter is building. A lot of work is needed in 2023 to
|
408
|
+
come up with a strategy for sustaining the Front Matter science blog archive
|
409
|
+
in the long run, all I can say now is that it will not use advertising.</p><h3
|
410
|
+
id=\"creative-commons-license\">Creative Commons License</h3><p>For content
|
411
|
+
that is free to read we need a license that specifies that. The blog archive
|
412
|
+
needs clear conditions for what it can do with the content, and the same is
|
413
|
+
true for downstream users and services. History tells us that licenses should
|
414
|
+
be clear and simple, so for scholarly blog posts I will aim to use the <a
|
415
|
+
href=\"https://creativecommons.org/licenses/by/4.0/legalcode\">Creative Commons
|
416
|
+
Attribution 4.0 License</a> (CC-BY 4.0) for all content. </p><figure class=\"kg-card
|
417
|
+
kg-image-card\"><img src=\"https://blog.front-matter.io/content/images/2022/12/cc-by.png\"
|
418
|
+
class=\"kg-image\" alt loading=\"lazy\" width=\"250\" height=\"88\"></figure><h3
|
419
|
+
id=\"central-blog-archive\">Central Blog Archive</h3><p>As I <a href=\"https://doi.org/10.53731/br9f5xa-a556w2t\">explained
|
420
|
+
in a post last week</a>, a central blog archive for blog content published
|
421
|
+
in many different places makes the most sense for science blog posts – a model
|
422
|
+
also used by <a href=\"https://www.ncbi.nlm.nih.gov/pmc/\">PubMed Central
|
423
|
+
</a>for a free full-text archive of biomedical and life sciences journal articles.
|
424
|
+
The <a href=\"https://inveniordm.docs.cern.ch/\">InvenioRDM</a> Open Source
|
425
|
+
software is a good fit for this use case.</p><figure class=\"kg-card kg-image-card\"><img
|
426
|
+
src=\"https://blog.front-matter.io/content/images/2022/12/Download--4--1.png\"
|
427
|
+
class=\"kg-image\" alt loading=\"lazy\" width=\"372\" height=\"120\"></figure><p>Starting
|
428
|
+
a science blog is straightforward. There are plenty of cheap and free options
|
429
|
+
available from <a href=\"https://wordpress.org/\">Wordpress</a> to <a href=\"https://pages.github.com/\">GitHub
|
430
|
+
Pages</a>. You might run your blog as part of a larger platform, together
|
431
|
+
with collaborators, or all for yourself.</p><h3 id=\"digital-object-identifier-doi-and-metadata\">Digital
|
432
|
+
Object Identifier (DOI) and Metadata</h3><p>DOIs are frequently used as persistent
|
433
|
+
identifiers for scholarly content and are integrated into the InvenioRDM platform.
|
434
|
+
The blog archive can either archive blog posts with DOIs, or it can issue
|
435
|
+
DOIs for existing blogs not using DOIs. In the latter case it is important
|
436
|
+
that the DOI resolves to the original content in the hosting blog platform,
|
437
|
+
and redirects to the blog platform only when the original blog is no longer
|
438
|
+
available. </p><p>DOIs (e.g. from DataCite or Crossref) have a required set
|
439
|
+
of metadata that makes sense for scholarly blogs. Optional metadata that are
|
440
|
+
desired for the blog archive are license (see above), abstract, subject area
|
441
|
+
(using the 43 <a href=\"https://en.wikipedia.org/wiki/Fields_of_Science_and_Technology\">OECD
|
442
|
+
Fields of Science and Technology</a>), keywords, language, and persistent
|
443
|
+
identifiers for the blog (<a href=\"https://www.issn.org/\">ISSN</a>), author
|
444
|
+
(<a href=\"https://orcid.org/\">ORCID</a>) and affiliated institution (<a
|
445
|
+
href=\"https://ror.org/\">ROR</a>).</p><h3 id=\"rich-site-summary-rss\">Rich
|
446
|
+
Site Summary (RSS)</h3><p><a href=\"https://en.wikipedia.org/wiki/RSS\">RSS</a>
|
447
|
+
is the standard protocol for distributing and consuming blog content. It is
|
448
|
+
actually a group of protocols (Atom and multiple flavors of the RSS format),
|
449
|
+
but they have been around for so long that the popular tools and services
|
450
|
+
support the various protocols. RSS will be the standard way how content is
|
451
|
+
ingested by the blog archive, and probably also how in turn content in the
|
452
|
+
central blog archive is consumed, e.g. as an automated feed of all new science
|
453
|
+
blog posts in a particular subject area and language.</p><figure class=\"kg-card
|
454
|
+
kg-image-card\"><img src=\"https://blog.front-matter.io/content/images/2022/12/images--1-.png\"
|
455
|
+
class=\"kg-image\" alt loading=\"lazy\" width=\"128\" height=\"128\"></figure><p>Because
|
456
|
+
RSS is so widely supported, other ways of registering content – e.g. via web
|
457
|
+
form, API, or webhook – are less critical for the blog archive. Work is needed
|
458
|
+
on the InvenioRDM software to add strong support for RSS feeds, but would
|
459
|
+
allow the automation of a lot of the work needed to build and maintain the
|
460
|
+
blog archive.</p><h3 id=\"markdown-and-pdf\">Markdown and PDF</h3><p><a href=\"https://daringfireball.net/projects/markdown/\">Markdown</a>
|
461
|
+
is a markup language popular with many blogging platforms. It is typically
|
462
|
+
used for editing blog posts and other documents in online environments but
|
463
|
+
is not really used for consuming blog content via RSS. Markdown has<a href=\"https://pandoc.org/\">
|
464
|
+
been extended</a> to support features needed for scholarly documents, e.g.
|
465
|
+
tables and references, but the uptake of this added functionality in science
|
466
|
+
blogs has been slow. </p><p>PDF is commonly used for reading scholarly publications.
|
467
|
+
The workflows for submitting manuscripts to journals and preprint archives
|
468
|
+
in PDF format are broken because it is tricky to extract structured documents
|
469
|
+
from PDFs. The blog archive will support PDF as an output format at some point
|
470
|
+
but is not a high priority. Blog posts are typically consumed via blog reader
|
471
|
+
or email (if the blog produces a newsletter) rather than as PDF printed out
|
472
|
+
on paper. There is work needed on the InvenioRDM platform to display full-text
|
473
|
+
content rendered as HTML.</p><h3 id=\"curation-and-community\">Curation and
|
474
|
+
Community</h3><p>Science blog posts typically see a lightweight review workflow
|
475
|
+
before publication, and often receive feedback in the form of comments and/or
|
476
|
+
social media mentions. For the Front Matter science blog archive, I want to
|
477
|
+
keep that approach and not build any hurdles for inclusion. Some level of
|
478
|
+
curation is needed, not only to check for quackery and hate speech but also
|
479
|
+
to improve metadata that help with discovery, and to find blogs that should
|
480
|
+
be included. Ideally we can build a community around the science blog archive,
|
481
|
+
taking advantage of the <a href=\"https://inveniordm.web.cern.ch/communities\">communities</a>
|
482
|
+
(focussing on different languages and subject areas) feature recently added
|
483
|
+
to the InvenioRDM software.</p><h3 id=\"flashback\">Flashback?</h3><p>If reading
|
484
|
+
this post feels like it is 2006 – the year <a href=\"https://en.wikipedia.org/wiki/James_Brown\">James
|
485
|
+
Brown</a> (used for the feature image of this post) died – again with talk
|
486
|
+
about blogs, RSS, Markdown, Creative Commons, and related technologies (I
|
487
|
+
for example didn''t mention Zotero, XML, or Wordpress), you are right. This
|
488
|
+
is intentional, these technologies are not as sexy as using artificial intelligence
|
489
|
+
or cryptocurrencies to drive this, but I want the Science Blog archive to
|
490
|
+
become a scholarly resource that is useful, open, and inclusive.</p><h3 id=\"references\">References</h3><p>Fenner,
|
491
|
+
M. (2022, September 28). Starting Work on the Front Matter Archive. <em>Front
|
492
|
+
Matter</em>. <a href=\"https://doi.org/10.53731/9z6rz5d-djbay0y\">https://doi.org/10.53731/9z6rz5d-djbay0y</a></p><p>Fenner,
|
493
|
+
M. (2022, December 12). Building an archive for scholarly blog posts. <em>Front
|
494
|
+
Matter</em>. <a href=\"https://doi.org/10.53731/br9f5xa-a556w2t\">https://doi.org/10.53731/br9f5xa-a556w2t</a></p><p>Fenner,
|
495
|
+
M. (2022, December 19). Launching the Front Matter Roadmap. <em>Front Matter</em>.
|
496
|
+
<a href=\"https://doi.org/10.53731/cbdtfp1-1798beh\">https://doi.org/10.53731/cbdtfp1-1798beh</a></p><p>Fenner,
|
497
|
+
M. (2010, October 6). Beyond the PDF – it is time for a workshop. <em>Front
|
498
|
+
Matter</em>. <a href=\"https://doi.org/10.53731/r294649-6f79289-8cw7z\">https://doi.org/10.53731/r294649-6f79289-8cw7z</a></p><p>Fenner,
|
499
|
+
M. (2013, June 19). Citations in Scholarly Markdown. <em>Front Matter</em>.
|
500
|
+
<a href=\"https://doi.org/10.53731/r294649-6f79289-8cw1b\">https://doi.org/10.53731/r294649-6f79289-8cw1b</a></p>
|
501
|
+
","tags":["Feature"],"language":"en"},{"id":"https://doi.org/10.53731/avg2ykg-gdxppcd","short_id":"j3ejvvep","url":"https://blog.front-matter.io/posts/need-to-fix-science-blogs/","title":"Do
|
502
|
+
we need to fix science blogs?","summary":"Science blogs have been around for
|
503
|
+
at least 20 years and have become an important part of science communication.
|
504
|
+
So are there any fundamental issues that need fixing?Barriers to EntryBlogging
|
505
|
+
platforms are...","date_published":"2023-01-25T15:14:17Z","date_modified":"2023-02-01T15:43:22Z","authors":[{"url":"https://orcid.org/0000-0003-1419-2405","name":"Martin
|
506
|
+
Fenner"}],"image":"https://images.unsplash.com/photo-1585838017777-5003198884b5?crop=entropy&cs=tinysrgb&fit=max&fm=jpg&ixid=MnwxMTc3M3wwfDF8c2VhcmNofDMyfHxicm9rZW58ZW58MHx8fHwxNjc0NjUyMTEy&ixlib=rb-4.0.3&q=80&w=2000","content_html":"
|
507
|
+
<p><img src=\"https://images.unsplash.com/photo-1585838017777-5003198884b5?crop=entropy&cs=tinysrgb&fit=max&fm=jpg&ixid=MnwxMTc3M3wwfDF8c2VhcmNofDMyfHxicm9rZW58ZW58MHx8fHwxNjc0NjUyMTEy&ixlib=rb-4.0.3&q=80&w=2000\"></p><p>Science
|
508
|
+
blogs have been around for at least 20 years and have become an important
|
509
|
+
part of science communication. So are there any fundamental issues that need
|
510
|
+
fixing?</p><h3 id=\"barriers-to-entry\">Barriers to Entry</h3><p>Blogging
|
511
|
+
platforms are mature at this point, and the technology is not imposing barriers
|
512
|
+
to entry for most people. The user experience has greatly improved over the
|
513
|
+
last few years and there are a number of affordable ways for hosting a blog
|
514
|
+
that also work for science blogs, including free options such as <a href=\"https://pages.github.com/\">GitHub
|
515
|
+
Pages</a>.</p><h3 id=\"open-access\">Open Access</h3><p>Science blogs have
|
516
|
+
traditionally been free to read, but there is a general trend towards subscriptions
|
517
|
+
for blogs (and related newsletters), as the advertising business model isn''t
|
518
|
+
really working for niche content such as most science. How to sustain science
|
519
|
+
blogging in the long run is an unresolved question, and charging authors (beyond
|
520
|
+
a nominal hosting fee) doesn''t look like a path forward. Luckily the costs
|
521
|
+
of publishing science blogs are very reasonable compared to journal publishing
|
522
|
+
or hosting research data and code.</p><h3 id=\"missing-functionality\">Missing
|
523
|
+
Functionality</h3><p>The basic functionality of formatted text with embedded
|
524
|
+
figures and links is supported by many blogging platforms. The requirements
|
525
|
+
of data-intensive science, e.g. interactive visualizations, can be a challenge,
|
526
|
+
but that is also true for publishing journal articles. Interactive environments
|
527
|
+
such as <a href=\"https://jupyter.org/\">Jupyter Notebooks</a> might be a
|
528
|
+
better fit for these use cases. </p><p>Reference management is probably the
|
529
|
+
biggest gap in science blogging, as handling more than a few references in
|
530
|
+
standard ways is not easily done by hand.</p><h3 id=\"impact-or-credit\">Impact
|
531
|
+
or Credit</h3><p>Unfortunately a lot of the activities of scholars are driven
|
532
|
+
by perceived <em>Impact </em>or<em> Credit</em>, and science blogs typically
|
533
|
+
don''t score high in this regard – with the exception of some disciplines
|
534
|
+
such as mathematics. There is probably no short-term solution, and I am not
|
535
|
+
even sure it is a problem that needs fixing. </p><p>The long-term solution
|
536
|
+
should focus on increasing the visibility and thus discoverability of science
|
537
|
+
blogs to reach a larger audience. As I discussed in a <a href=\"https://doi.org/10.53731/br9f5xa-a556w2t\">previous
|
538
|
+
post</a>, my preferred approach is a central repository for science blog content
|
539
|
+
originally published in many different locations (the PubMed/PubMed Central)
|
540
|
+
model.</p><h3 id=\"persistence\">Persistence</h3><p>This leaves persistence
|
541
|
+
as the other main problem with science blogs besides discoverability that
|
542
|
+
needs fixing. Link rot (the resource identified by a URI vanishes from the
|
543
|
+
web) and content drift (the resource identified by a URI changes over time)
|
544
|
+
are <a href=\"https://ceur-ws.org/Vol-3246/10_Paper3.pdf\">well-known problems
|
545
|
+
with digital content</a>, from <a href=\"https://www.theverge.com/2021/5/21/22447690/link-rot-research-new-york-times-domain-hijacking\">newspapers</a>
|
546
|
+
to scholarly content. There are mainly two approaches to address this problem:</p><ul><li><strong>Archiving</strong>
|
547
|
+
using generic services such as the <a href=\"https://archive.org/\">Internet
|
548
|
+
Archive</a> and specialized services such as <a href=\"https://www.softwareheritage.org/\">Software
|
549
|
+
Heritage</a> for software source code or <a href=\"https://www.portico.org/\">Portico</a>
|
550
|
+
for scholarly content.</li><li><strong>Persistent Identifiers</strong> by
|
551
|
+
maintaining links independent of URL host and path, both of which may change
|
552
|
+
over time. This <a href=\"https://doi.org/10.53731/r294649-6f79289-8cw1h\">blog
|
553
|
+
post of mine</a> is almost 14 years old, and the URL has changed at least
|
554
|
+
four times as I changed blogging platforms. Since 2021 the post has had a
|
555
|
+
persistent identifier in form of a DOI, and that DOI will not change going
|
556
|
+
forward, eventually pointing to an archive when I retire.</li></ul><p>Some
|
557
|
+
science blog content is ephemeral and may not be worth archiving, but a lot
|
558
|
+
of content is still worth reading years later (the <a href=\"ttps://doi.org/10.53731/r294649-6f79289-8cw1q\">first
|
559
|
+
post of this blog</a> is more than 15 years old), even if only to provide
|
560
|
+
historical context.</p><h3 id=\"conclusions\">Conclusions</h3><p>In summary,
|
561
|
+
we don''t need to <em>fix everything</em> with science blogs but rather focus
|
562
|
+
on two aspects: discoverability and persistence. In doing that we also need
|
563
|
+
to sort out better sustainability for science blogs, and as an added bonus
|
564
|
+
improve their reference management.</p><p>Discoverability and persistence
|
565
|
+
are an issue for all science blogs, and we are trying to fix them by launching
|
566
|
+
the <a href=\"https://rogue-scholar.org/\">Rogue Scholar</a> in the second
|
567
|
+
quarter of 2023. If you are managing a science blog and care about discoverability
|
568
|
+
and persistence, sign up for the <a href=\"https://rogue-scholar.org/\">Rogue
|
569
|
+
Scholar waitlist</a>. Particularly if your blog is no longer actively maintained,
|
570
|
+
for example blogs hosted by grant-funded projects that have ended or are ending
|
571
|
+
soon.</p><p>Today I launched the <a href=\"https://docs.rogue-scholar.org\">Rogue
|
572
|
+
Scholar Documentation</a> site, where I will document how to use the Rogue
|
573
|
+
Scholar, e.g. what you can do to prepare your science blog for Rogue Scholar
|
574
|
+
archiving. The site is written in markdown and hosted on GitHub, so feel free
|
575
|
+
to ask questions or suggest additions via the links provided by the documentation
|
576
|
+
site.</p> ","tags":["Feature"],"language":"en"},{"id":"https://doi.org/10.53731/n7vvs-h6995","short_id":"zkevm5e3","url":"https://blog.front-matter.io/posts/rogue-scholar-releases-first-catalog/","title":"The
|
577
|
+
Rogue Scholar releases its first catalog of science blogs","summary":"The
|
578
|
+
Rogue Scholar blog archive today released its first catalog of science blogs,
|
579
|
+
a total of nineteen science blogs that signed up for the Rogue Scholar via
|
580
|
+
submission form and met the inclusion criteria: The...","date_published":"2023-03-29T20:46:54Z","date_modified":"2023-04-04T09:22:41Z","authors":[{"url":"https://orcid.org/0000-0003-1419-2405","name":"Martin
|
581
|
+
Fenner"}],"image":"https://images.unsplash.com/photo-1662582632158-7f0f6e9a617b?crop=entropy&cs=tinysrgb&fit=max&fm=jpg&ixid=MnwxMTc3M3wwfDF8c2VhcmNofDMzfHxjYXRhbG9nfGVufDB8fHx8MTY4MDEyMTQ2MQ&ixlib=rb-4.0.3&q=80&w=2000","content_html":"
|
582
|
+
<p><img src=\"https://images.unsplash.com/photo-1662582632158-7f0f6e9a617b?crop=entropy&cs=tinysrgb&fit=max&fm=jpg&ixid=MnwxMTc3M3wwfDF8c2VhcmNofDMzfHxjYXRhbG9nfGVufDB8fHx8MTY4MDEyMTQ2MQ&ixlib=rb-4.0.3&q=80&w=2000\"></p><p>The
|
583
|
+
Rogue Scholar blog archive today released its <a href=\"https://rogue-scholar.org/blogs\">first
|
584
|
+
catalog of science blogs</a>, a total of nineteen science blogs that signed
|
585
|
+
up for the Rogue Scholar via <a href=\"https://jvinjjenjik.typeform.com/to/uxgAsHPl\">submission
|
586
|
+
form</a> and met the inclusion criteria: </p><ul><li>The blog is about science
|
587
|
+
and in English or German (more languages will follow later, reach out to me
|
588
|
+
if you can help).</li><li>The full-text content is available via RSS feed
|
589
|
+
and distributed using a Creative Commons Attribution license (<a href=\"https://creativecommons.org/licenses/by/4.0/legalcode\">CC-BY</a>).</li></ul><p>The
|
590
|
+
Rogue Scholar will launch in the second quarter of this year, and this list
|
591
|
+
of science blogs is an important step. The RSS feeds of the included blogs
|
592
|
+
will be used to archive content and register DOIs, and they contain important
|
593
|
+
information that I will include over time, including license, language, blog
|
594
|
+
description, blog logo, contact person, and blogging platform.</p><figure
|
595
|
+
class=\"kg-card kg-image-card kg-card-hascaption\"><img src=\"https://blog.front-matter.io/content/images/2023/03/Bildschirmfoto-2023-03-29-um-22.38.08.png\"
|
596
|
+
class=\"kg-image\" alt loading=\"lazy\" width=\"2000\" height=\"841\" srcset=\"https://blog.front-matter.io/content/images/size/w600/2023/03/Bildschirmfoto-2023-03-29-um-22.38.08.png
|
597
|
+
600w, https://blog.front-matter.io/content/images/size/w1000/2023/03/Bildschirmfoto-2023-03-29-um-22.38.08.png
|
598
|
+
1000w, https://blog.front-matter.io/content/images/size/w1600/2023/03/Bildschirmfoto-2023-03-29-um-22.38.08.png
|
599
|
+
1600w, https://blog.front-matter.io/content/images/size/w2400/2023/03/Bildschirmfoto-2023-03-29-um-22.38.08.png
|
600
|
+
2400w\" sizes=\"(min-width: 720px) 720px\"><figcaption>Subset of the blogs
|
601
|
+
included in the first <a href=\"https://rogue-scholar.org/blogs\">Rogue Scholar
|
602
|
+
catalog</a></figcaption></figure><p>The first Rogue Scholar catalog can be
|
603
|
+
used as a starting point to find interesting science blogs, but more importantly,
|
604
|
+
the catalog is available as an <a href=\"https://doi.org/10.53731/wa7k5-v4t16\">OPML
|
605
|
+
file</a> for download and can be imported (and modified) into any blog reader.</p>
|
606
|
+
","tags":["News"],"language":"en"},{"id":"https://doi.org/10.53731/d6vdvbt-tffmezj","short_id":"5ldw65eo","url":"https://blog.front-matter.io/posts/rss-atom-jsonfeed/","title":"RSS,
|
607
|
+
Atom, JSON Feed","summary":"As I discussed in a recent post, RSS is an essential
|
608
|
+
building block for the upcoming Rogue Scholar Scholarly Blog Archive. RSS
|
609
|
+
makes it easy to import blog posts (both metadata and content) automatically
|
610
|
+
and...","date_published":"2023-01-16T16:57:54Z","date_modified":"2023-01-16T17:06:53Z","authors":[{"url":"https://orcid.org/0000-0003-1419-2405","name":"Martin
|
611
|
+
Fenner"}],"image":"https://images.unsplash.com/photo-1597092451116-27787c07901d?crop=entropy&cs=tinysrgb&fit=max&fm=jpg&ixid=MnwxMTc3M3wwfDF8c2VhcmNofDJ8fGFyY2hpdmV8ZW58MHx8fHwxNjczODg2NDI2&ixlib=rb-4.0.3&q=80&w=2000","content_html":"
|
612
|
+
<p><img src=\"https://images.unsplash.com/photo-1597092451116-27787c07901d?crop=entropy&cs=tinysrgb&fit=max&fm=jpg&ixid=MnwxMTc3M3wwfDF8c2VhcmNofDJ8fGFyY2hpdmV8ZW58MHx8fHwxNjczODg2NDI2&ixlib=rb-4.0.3&q=80&w=2000\"></p><p>As
|
613
|
+
I discussed in a <a href=\"https://doi.org/10.53731/eyf75cj-jsgv26c\">recent
|
614
|
+
post</a>, RSS is an essential building block for the upcoming <a href=\"https://rogue-scholar.org\">Rogue
|
615
|
+
Scholar</a> Scholarly Blog Archive. RSS makes it easy to import blog posts
|
616
|
+
(both metadata and content) automatically and is supported by all blogging
|
617
|
+
platforms. This kind of automation is critical to keep the costs of running
|
618
|
+
the Rogue Scholar low, allowing it to scale to cover a substantial number
|
619
|
+
of science blog posts, and hopefully becoming an important Open Science resource.</p><p>But
|
620
|
+
there are also challenges with using RSS:</p><ul><li>RSS is not a single standard
|
621
|
+
but comes in multiple flavors: multiple versions of RSS, Atom, and the newer
|
622
|
+
<a href=\"https://www.jsonfeed.org/\">JSON Feed</a>. Most libraries for consuming
|
623
|
+
RSS (e.g. the Python <a href=\"https://github.com/kurtmckee/feedparser\">feedparser</a>)
|
624
|
+
can handle RSS and Atom, and fewer tools (e.g. the Python <a href=\"https://pypi.org/project/reader/\">feeder</a>)
|
625
|
+
also support the newer JSON Feed.</li><li>The Rogue Scholar will use the <a
|
626
|
+
href=\"https://inveniordm.docs.cern.ch/\">InvenioRDM</a> open source platform,
|
627
|
+
which uses <a href=\"https://opensearch.org/\">OpenSearch</a> to index content
|
628
|
+
and metadata. OpenSearch – just like Elasticsearch on which it is based –
|
629
|
+
works with JSON. Indexing and archiving science blogs therefore should first
|
630
|
+
convert RSS and Atom feeds onto JSON, and JSON Feed, <a href=\"https://www.jsonfeed.org/mappingrssandatom/\">which
|
631
|
+
has been mapped from RSS and Atom</a>, is the obvious choice.</li><li>Some
|
632
|
+
blogs prefer to only publish summaries in their RSS feeds, there have been
|
633
|
+
many discussions on this topic over the years. It would complicate the operation
|
634
|
+
of the Rogue Scholar if full-text content has to retrieved by other means,
|
635
|
+
and archiving full-text content is the primary goal for the Rogue Scholar.
|
636
|
+
The Rogue Scholar needs one feed that provides the full-text content, it doesn''t
|
637
|
+
have to be the default blog feed.</li><li>Blogs, in particular personal blogs,
|
638
|
+
may publish content that is out of the scope of the main science topics of
|
639
|
+
the blog. Occasional out-of-scope posts, e.g. talking about major events such
|
640
|
+
as job changes, sickness, or travel, are probably ok, and add a personal note.
|
641
|
+
If this is frequently the case, and this has come up twice in initial Rogue
|
642
|
+
Scholar discussions, it probably makes sense to provide a filtered RSS feed
|
643
|
+
(e.g. using tags) with only a subset of posts.</li><li>Describing a blog and
|
644
|
+
associated metadata (e.g. name, feed URL, language, license, contact) is not
|
645
|
+
something that easily maps how InvenioRDM is modeled. The obvious choice would
|
646
|
+
be <a href=\"https://inveniordm.web.cern.ch/communities\">communities</a>,
|
647
|
+
but they can also be seen as a higher level of aggregation, e.g. all blog
|
648
|
+
posts about biodiversity independent of the blog source. For now I will work
|
649
|
+
with communities and enhance the InvenioRDM functionality where it also makes
|
650
|
+
sense for other InvenioRDM use cases, of course coordinating with the InvenioRDM
|
651
|
+
community.</li></ul><p>Two weeks ago I opened up the <a href=\"https://jvinjjenjik.typeform.com/to/uxgAsHPl\">waitlist</a>
|
652
|
+
for the Rogue Scholar, and I am happy with the feedback I have received so
|
653
|
+
far: sixteen submissions and a number of encouraging discussions. Consider
|
654
|
+
adding your science blog to the waitlist, or learn more at the <a href=\"https://rogue-scholar.org\">Rogue
|
655
|
+
Scholar</a> website. If you have questions, post them in the comments or join
|
656
|
+
the <a href=\"https://discord.gg/wZcqPt4p\">Discord channel </a>(renamed from
|
657
|
+
Front Matter to Rogue Scholar).</p><p>It has not escaped our notice that the
|
658
|
+
specific use of RSS we have postulated immediately suggests a possible mechanism
|
659
|
+
for the archiving and DOI registration of other scholarly content.</p> ","tags":["Feature"],"language":"en"},{"id":"https://doi.org/10.53731/88drdpz-znvdjr9","short_id":"qlgxvqdm","url":"https://blog.front-matter.io/posts/launching-the-front-matter-gazette/","title":"Launching
|
660
|
+
the Front Matter Gazette","summary":"On Wednesday this week I am launching
|
661
|
+
the Front Matter Gazette, a weekly newsletter that highlights exciting science
|
662
|
+
stories from around the web. The linked content highlighted in the newsletter
|
663
|
+
is published...","date_published":"2023-01-30T12:48:26Z","date_modified":"2023-01-30T12:48:26Z","authors":[{"url":"https://orcid.org/0000-0003-1419-2405","name":"Martin
|
664
|
+
Fenner"}],"image":"https://images.unsplash.com/photo-1521134976835-9963f2185519?crop=entropy&cs=tinysrgb&fit=max&fm=jpg&ixid=MnwxMTc3M3wwfDF8c2VhcmNofDE2fHxqb3VybmFsfGVufDB8fHx8MTY3NTAxMzMwNA&ixlib=rb-4.0.3&q=80&w=2000","content_html":"
|
665
|
+
<p><img src=\"https://images.unsplash.com/photo-1521134976835-9963f2185519?crop=entropy&cs=tinysrgb&fit=max&fm=jpg&ixid=MnwxMTc3M3wwfDF8c2VhcmNofDE2fHxqb3VybmFsfGVufDB8fHx8MTY3NTAxMzMwNA&ixlib=rb-4.0.3&q=80&w=2000\"></p><p>On
|
666
|
+
Wednesday this week I am launching the <em>Front Matter Gazette</em>, a weekly
|
667
|
+
newsletter that highlights exciting science stories from around the web. The
|
668
|
+
linked content highlighted in the newsletter is published elsewhere and is
|
669
|
+
free to read whenever possible. The newsletter requires a paid subscription
|
670
|
+
(<a href=\"https://blog.front-matter.io/#/portal/signup\">available here</a>),
|
671
|
+
5 €/month or 50 €/year with a thirty-day free trial and free subscriptions
|
672
|
+
on request. The subscription fees help pay for the curation effort – finding
|
673
|
+
and summarizing the most exciting science stories. </p><h3 id=\"why-do-we-need-to-highlight-the-most-interesting-science\">Why
|
674
|
+
do we need to highlight the most interesting science?</h3><p>With the <em>Front
|
675
|
+
Matter Gazette,</em> I try a new approach to addressing an old problem: information
|
676
|
+
overload.</p><figure class=\"kg-card kg-embed-card kg-card-hascaption\"><iframe
|
677
|
+
width=\"200\" height=\"150\" src=\"https://www.youtube.com/embed/LabqeJEOQyI?feature=oembed\"
|
678
|
+
frameborder=\"0\" allow=\"accelerometer; autoplay; clipboard-write; encrypted-media;
|
679
|
+
gyroscope; picture-in-picture; web-share\" allowfullscreen title=\"Web 2.0
|
680
|
+
Expo NY: Clay Shirky (shirky.com) It''s Not Information Overload. It''s Filter
|
681
|
+
Failure.\"></iframe><figcaption>Web 2.0 Expo NY: Clay Shirky (shirky.com)
|
682
|
+
It''s Not Information Overload. It''s Filter Failure.</figcaption></figure><p>The
|
683
|
+
approach traditionally often used in science has been to use journals as a
|
684
|
+
filter. There are many reasons why this approach has failed, described for
|
685
|
+
example in <a href=\"https://asapbio.org/addressing-information-overload-in-scholarly-literature\">this
|
686
|
+
2021 post on the ASAPbio blog</a> by Christine Ferguson and me. Three important
|
687
|
+
limitations are:</p><ul><li><strong>Delays</strong>. The time from submission
|
688
|
+
to publication for peer-reviewed journal articles can be significant, which
|
689
|
+
causes critical issues in situations that need quick actions based on science
|
690
|
+
such as in the COVID pandemic, but also for early career researchers.</li><li><strong>Focus
|
691
|
+
on the journal article</strong>. Journal articles are the main channel of
|
692
|
+
scientific communication in many disciplines, but large parts of scholarship
|
693
|
+
focus on something else, for example, conference proceedings in computer science
|
694
|
+
or books in the humanities. In addition, newer outputs of scholarship such
|
695
|
+
as research data or software source code are left out or only captured <em>by
|
696
|
+
proxy</em>, publishing journals with articles describing software or data.</li><li><strong>Not
|
697
|
+
Open Science</strong>. Leaving the decision to what is important in science
|
698
|
+
to journal publishers, often commercial, instead of the scientists themselves,
|
699
|
+
is the wrong choice as other interests interfere, and marginalized communities
|
700
|
+
and regions are left out not only of science publishing but also of what science
|
701
|
+
is highlighted and promoted.</li></ul><p>Two alternative approaches to journals
|
702
|
+
as a filter are <strong>automation</strong> and <strong>curation</strong>.
|
703
|
+
In the ASAPbio blog post mentioned earlier, Christine and I discussed an automation
|
704
|
+
approach we tried out in 2021, filtering relevant biomedical preprints by
|
705
|
+
the attention they received on Twitter immediately after publication. We have
|
706
|
+
not continued this activity beyond early 2022 for two reasons: a) I spent
|
707
|
+
the first <a href=\"https://doi.org/10.53731/bkkzj8g-gd14mb6\">five months
|
708
|
+
of 2022 in the hospital</a>, and b) in November 2022 I left Twitter and moved
|
709
|
+
to <a href=\"https://hachyderm.io/@mfenner\">Mastodon</a> after the change
|
710
|
+
in Twitter ownership.</p><p>There are many initiatives in this space that
|
711
|
+
try to use computer algorithms to find the most relevant scholarly content,
|
712
|
+
but Christine and I felt that this was only the first step and that <strong>curation</strong>
|
713
|
+
was key to finding what is interesting and relevant. Curation is what journal
|
714
|
+
editors have always done, and what is helped with peer review since it became
|
715
|
+
increasingly required in the 1960s, but when curation is used to find what
|
716
|
+
is interesting and relevant, and not what should be published, there is no
|
717
|
+
longer a need to leave the curation exclusively up to journals.</p><p>An Open
|
718
|
+
Science approach to curation has many elements, but a newsletter feels like
|
719
|
+
a good fit. It is a low-tech approach that works even for the busiest scientists,
|
720
|
+
and it can be combined with the automation approaches discussed earlier. And
|
721
|
+
curated newsletters about Science and Scholarship work with preprints, research
|
722
|
+
data, source code, and other forms of scholarship. A related activity, no
|
723
|
+
longer so low-tech, is science podcasts, which arguably are currently more
|
724
|
+
popular than science newsletters.</p><h3 id=\"and-who-is-going-to-pay-for-this\">And
|
725
|
+
who is going to pay for this?</h3><p>There are two elephants in the room for
|
726
|
+
paying for this activity: advertising and grant funding. Advertising is not
|
727
|
+
only a frustrating experience for readers and authors, but also doesn''t really
|
728
|
+
work in a niche market such as science. The current issues at the German <a
|
729
|
+
href=\"https://scienceblogs.de/\">scienceblogs.de</a> are only the latest
|
730
|
+
example of the difficulties sustaining science blogging infrastructure.</p><p>Grant
|
731
|
+
funding is a well-established strategy to pay for Open Science activities,
|
732
|
+
but has two major limitations: a) it is not a good fit for the long tail of
|
733
|
+
science (Front Matter for example is not (yet) a non-profit organization because
|
734
|
+
the time and money required to start a non-profit in Germany are far from
|
735
|
+
trivial), and b) grant funding likes to pay for innovation and research, getting
|
736
|
+
funding for open scholarly infrastructure is much harder.</p><p>Of course
|
737
|
+
Front Matter is open for startup funding for the <em>Front Matter Gazette</em>,
|
738
|
+
but it should not be a requirement to get the <em>Gazette</em> started, and
|
739
|
+
I can not promise any financial returns for an investment.</p><p>Paying even
|
740
|
+
a small fee of 5 € per month for a useful Open Science resource can be a hurdle,
|
741
|
+
as <a href=\"https://blog.impactstory.org/subscription-announcement\">Impactstory
|
742
|
+
can attest</a>. That is why we offer a no-questions-asked fee waiver, and
|
743
|
+
why we start the Gazette as an experiment where we don''t know the outcome
|
744
|
+
yet.</p><h3 id=\"will-the-front-matter-gazette-work\">Will the Front Matter
|
745
|
+
Gazette work?</h3><p>Only time will tell whether the Gazette can attract enough
|
746
|
+
readers to become a sustainable operation, and I will work on the Gazette
|
747
|
+
until 2024 to make that call. The <a href=\"https;//ghost.org\">Ghost publishing
|
748
|
+
platform</a> powering this blog since 2021 is for people who believe in this
|
749
|
+
vision (mostly in domains other than science):</p><blockquote>Ghost is a powerful
|
750
|
+
app for new-media creators to publish, share, and grow a business around their
|
751
|
+
content. It comes with modern tools to build a website, publish content, send
|
752
|
+
newsletters & offer paid subscriptions to members. – <a href=\"https://ghost.org/\">Ghost
|
753
|
+
Homepage</a></blockquote><p>Future plans for the <em>Front Matter Gazette</em>
|
754
|
+
in case of a successful start focus on expanding the coverage – five stories
|
755
|
+
a week is not even the tip of the iceberg of what''s happening every week
|
756
|
+
in scholarship.</p><h3 id=\"what-is-the-relationship-to-the-rogue-scholar\">What
|
757
|
+
is the relationship to the Rogue Scholar?</h3><p><a href=\"https://rogue-scholar.org/\">The
|
758
|
+
Rogue Scholar</a> is a science blog archive that I am working on and plan
|
759
|
+
to launch in Q2 2023. Making sure that science blogs can be found over time
|
760
|
+
with the help of full-text search, DOIs plus metadata, and long-term archiving
|
761
|
+
is the first critical step. Using this open content in creative ways is the
|
762
|
+
next step, and curation is one important aspect that I try to start addressing
|
763
|
+
with the <em>Front Matter Gazette</em>. The <em>Front Matter Gazette</em>
|
764
|
+
will highlight all kinds of scholarly content, not just blogs, and not only
|
765
|
+
content archived in the Rogue Scholar, but there are of course synergies that
|
766
|
+
I will try to explore.</p><h3 id=\"what-is-in-the-first-issue-of-the-front-matter-gazette\">What
|
767
|
+
is in the first issue of the Front Matter Gazette?</h3><p>In the February
|
768
|
+
1st issue I will talk about Neanderthal families, ChatGPT in science publishing,
|
769
|
+
the Tidyverse, eradicating an infectious disease, and medieval manuscripts.</p>
|
770
|
+
","tags":["News"],"language":"en"},{"id":"https://doi.org/10.53731/wa7k5-v4t16","short_id":"wneyvxe4","url":"https://blog.front-matter.io/posts/starting-the-rogue-scholar-opml-feed/","title":"Starting
|
771
|
+
the Rogue Scholar OPML Feed","summary":"While the launch of the Rogue Scholar
|
772
|
+
blog archive is still a few months away (happening in the second quarter of
|
773
|
+
this year), I want to give an update on the ongoing work.The Rogue Scholar
|
774
|
+
blog archive will...","date_published":"2023-03-22T10:42:17Z","date_modified":"2023-03-22T10:42:17Z","authors":[{"url":"https://orcid.org/0000-0003-1419-2405","name":"Martin
|
775
|
+
Fenner"}],"image":"https://images.unsplash.com/photo-1611864581049-aca018410b97?crop=entropy&cs=tinysrgb&fit=max&fm=jpg&ixid=MnwxMTc3M3wwfDF8c2VhcmNofDQzfHxmZWVkfGVufDB8fHx8MTY3OTQ3NDc2NQ&ixlib=rb-4.0.3&q=80&w=2000","content_html":"
|
776
|
+
<p><img src=\"https://images.unsplash.com/photo-1611864581049-aca018410b97?crop=entropy&cs=tinysrgb&fit=max&fm=jpg&ixid=MnwxMTc3M3wwfDF8c2VhcmNofDQzfHxmZWVkfGVufDB8fHx8MTY3OTQ3NDc2NQ&ixlib=rb-4.0.3&q=80&w=2000\"></p><p>While
|
777
|
+
the launch of the <a href=\"https://rogue-scholar.org/\">Rogue Scholar</a>
|
778
|
+
blog archive is still a few months away (happening in the second quarter of
|
779
|
+
this year), I want to give an update on the ongoing work.</p><p>The <em>Rogue
|
780
|
+
Scholar</em> blog archive will improve science blogs in important ways,<br>including
|
781
|
+
full-text search, DOIs and metadata, and long-term archiving. The central
|
782
|
+
piece of the underlying infrastructure is the <a href=\"https://inveniosoftware.org/products/rdm/\">InvenioRDM
|
783
|
+
</a>open source repository software. Front Matter is one of the organizations
|
784
|
+
helping with InvenioRDM development. For the <em>Rogue Scholar,</em> the specific
|
785
|
+
work needed includes the following:</p><h3 id=\"support-for-rss-feeds\">Support
|
786
|
+
for RSS Feeds</h3><p>All blogs provide RSS feeds, which will be central to
|
787
|
+
automatically fetching metadata and content for the <em>Rogue Scholar</em>.
|
788
|
+
RSS is not built into InvenioRDM and is not needed by most organizations planning
|
789
|
+
to run InvenioRDM. I will therefore build a separate service for this functionality,
|
790
|
+
integrating with InvenioRDM via its REST API. For a blog to be archived and
|
791
|
+
indexed in the <em>Rogue Scholar</em>, users will use this RSS service, providing
|
792
|
+
basic information such as RSS feed URL, language, license, and contact person
|
793
|
+
– basically the information collected for the <em>Rogue Scholar</em> <a href=\"https://jvinjjenjik.typeform.com/to/uxgAsHPl?typeform-source=rogue-scholar.org\">waitlist</a>
|
794
|
+
(feel free to sign up your blog if you haven''t already).</p><p>Next Tuesday
|
795
|
+
I will publish an <a href=\"https://en.wikipedia.org/wiki/OPML\">OPML</a>
|
796
|
+
(Outline Processor Markup Language) file with all blogs on the <em>Rogue Scholar</em>
|
797
|
+
waitlist. OPML is the standard for importing and exporting lists of blogs,
|
798
|
+
e.g. when switching from one RSS reader to another. It is a natural fit for
|
799
|
+
managing blogs in <em>Rogue Scholar</em>, and hopefully helps people sign
|
800
|
+
up for interesting science blogs they want to read. If you are on the <em>Rogue
|
801
|
+
Scholar </em>waitlist, please make sure your RSS Feed URL and Home Page URL
|
802
|
+
are correct, and – if you haven''t done so already – pick one (and only one)
|
803
|
+
of the top-level categories from the <a href=\"https://www.oecd.org/science/inno/38235147.pdf\">OECD
|
804
|
+
Fields of Science and Technology</a>:</p><ul><li>Natural Sciences</li><li>Engineering
|
805
|
+
and Technology</li><li>Medical and Health Sciences</li><li>Agricultural Sciences</li><li>Social
|
806
|
+
Sciences</li><li>Humanities</li></ul><p>The OPML file (and your RSS reader
|
807
|
+
if you import that file) will group science blogs into these categories. Many
|
808
|
+
blogs fall into more than one category, but that isn''t supported by OPML.
|
809
|
+
</p><h3 id=\"hosting-rogue-scholar-infrastructure\">Hosting Rogue Scholar
|
810
|
+
infrastructure</h3><p>There are <a href=\"https://inveniordm.docs.cern.ch/install/\">several
|
811
|
+
ways</a> to run InvenioRDM repository software, obviously depending on the
|
812
|
+
resources available at the hosting organization, and the size and complexity
|
813
|
+
of the repository. A small data repository for a university department has
|
814
|
+
different needs than <a href=\"https://zenodo.org/\">Zenodo</a>, one of the
|
815
|
+
most popular generalist repositories with almost three million records. The
|
816
|
+
<em>Rogue Scholar</em> sits in the middle, a small to medium-sized repository,
|
817
|
+
anticipating 2,000 to 20,000 blog posts twelve months after launch. InvenioRDM
|
818
|
+
relies on <a href=\"https://www.docker.com/\">Docker</a> and Kubernetes for
|
819
|
+
running production services. This makes sense for large instances such as
|
820
|
+
Zenodo but adds unnecessary complexity to smaller instances such as the <em>Rogue
|
821
|
+
Scholar</em>.</p><p>After a substantial amount of deliberation and discussion,
|
822
|
+
I decided to use a different approach for the <em>Rogue Scholar</em>, and
|
823
|
+
this might potentially be of interest to other organizations planning to use
|
824
|
+
InvenioRDM:</p><ul><li>Using virtual machines instead of Docker containers</li><li>Automation
|
825
|
+
of virtual machine building with <a href=\"https://www.packer.io/\">Packer</a>
|
826
|
+
and <a href=\"https://www.ansible.com/\">Ansible</a></li><li>Hosting of virtual
|
827
|
+
machines by cloud provider <a href=\"https://www.digitalocean.com/\">DigitalOcean</a>,
|
828
|
+
fundamentally similar to hosting a Wordpress or Ghost blog</li><li>Making
|
829
|
+
the automation generic to also work for other InvenioRDM instances, and other
|
830
|
+
infrastructure providers, e.g. <a href=\"https://www.openstack.org/\">Openstack</a></li></ul><p>This
|
831
|
+
will be the focus of my work in the next three months, and luckily I have
|
832
|
+
learned a lot about infrastructure automation in my previous jobs at <a href=\"https://plos.org/\">PLOS</a>
|
833
|
+
and <a href=\"https://datacite.org/\">DataCite</a>.</p><h3 id=\"support-for-crossref-doi-registration\">Support
|
834
|
+
for Crossref DOI registration</h3><p>By default, InvenioRDM uses DataCite
|
835
|
+
DOIs, but <em>Rogue Scholar</em> will use Crossref DOIs for blogs that don''t
|
836
|
+
already use DOIs. The Crossref pricing is much more favorable for startups
|
837
|
+
such as Front Matter, and for annual DOI registration numbers that at least
|
838
|
+
initially will be in the 100s or low 1000s. I spent a good part of January
|
839
|
+
and February writing a Python scholarly metadata conversion library that I
|
840
|
+
released two weeks ago (<a href=\"https://pypi.org/project/commonmeta-py/\">commonmeta-py</a>).
|
841
|
+
Among other things, commonmeta-py can read and write Crossref metadata and
|
842
|
+
can enable Crossref DOI registrations in InvenioRDM – which is written in
|
843
|
+
Python (and Javascript for the frontend).</p><p>As always, reach out to me
|
844
|
+
with questions and comments.</p> ","tags":[],"language":"en"},{"id":"https://doi.org/10.53731/cbvm43q-qdk3s1s","short_id":"nodz2pdp","url":"https://blog.front-matter.io/posts/science-blog-archive-waitlist/","title":"Sign
|
845
|
+
up for the science blog archive waitlist","summary":"The science blog archive
|
846
|
+
that I have started to work on (see previous posts) finally has a name: the
|
847
|
+
Rogue Scholar. I picked this name because I liked the description in the Urban
|
848
|
+
Dictionary.A person with...","date_published":"2023-01-02T11:31:52Z","date_modified":"2023-01-02T11:31:52Z","authors":[{"url":"https://orcid.org/0000-0003-1419-2405","name":"Martin
|
849
|
+
Fenner"}],"image":"https://images.unsplash.com/photo-1577046823799-58b2d217d508?crop=entropy&cs=tinysrgb&fit=max&fm=jpg&ixid=MnwxMTc3M3wwfDF8c2VhcmNofDZ8fGhhcHB5JTIwbmV3JTIweWVhcnxlbnwwfHx8fDE2NzI2NTY4MzQ&ixlib=rb-4.0.3&q=80&w=2000","content_html":"
|
850
|
+
<p><img src=\"https://images.unsplash.com/photo-1577046823799-58b2d217d508?crop=entropy&cs=tinysrgb&fit=max&fm=jpg&ixid=MnwxMTc3M3wwfDF8c2VhcmNofDZ8fGhhcHB5JTIwbmV3JTIweWVhcnxlbnwwfHx8fDE2NzI2NTY4MzQ&ixlib=rb-4.0.3&q=80&w=2000\"></p><p>The
|
851
|
+
science blog archive that I have started to work on (<a href=\"https://doi.org/10.53731/eyf75cj-jsgv26c\">see
|
852
|
+
previous posts</a>) finally has a name: the <em>Rogue Scholar</em>. I picked
|
853
|
+
this name because I liked the description in the <a href=\"https://www.urbandictionary.com/define.php?term=rogue%20scholar\">Urban
|
854
|
+
Dictionary</a>.</p><blockquote>A person with extensive knowledge pertaining
|
855
|
+
to various subject matters that extends beyond formal education. This person
|
856
|
+
often <strong>gathers</strong> knowledge from various sources, such as media,
|
857
|
+
friends, casual reading or the internet.</blockquote><p>And I started a waitlist
|
858
|
+
for people interested in having their science blog archived in the <em>Rogue
|
859
|
+
Scholar</em>. There is still a lot of work to do, but I hope to launch the
|
860
|
+
archive in the second quarter of 2023 with these core features:</p><ul><li>based
|
861
|
+
on the <a href=\"https://inveniordm.docs.cern.ch/\">InvenioRDM</a> open source
|
862
|
+
software, hosted by Front Matter</li><li>free to archive 50 blog posts per
|
863
|
+
year. For larger blogs or a backfile of several years, the Rogue Scholar will
|
864
|
+
charge a one-time fee of 1 € per blog post, and I have started to work on
|
865
|
+
securing additional funding for this.</li><li>Full-text search of blog content,
|
866
|
+
typically not available on self-hosted blogs</li><li>DOI registration for
|
867
|
+
blog posts, facilitating discovery and integration of blogs into the scholarly
|
868
|
+
record</li><li>free to read and reuse forever, using the Creative Commons
|
869
|
+
Attribution (<a href=\"https://creativecommons.org/licenses/by/4.0/\">CC-BY</a>)
|
870
|
+
license</li><li>initially support English and German language posts</li></ul><p>The
|
871
|
+
form to sign up for the waitlist is available <a href=\"https://jvinjjenjik.typeform.com/to/uxgAsHPl\">here</a>.</p>
|
872
|
+
","tags":["News"],"language":"en"},{"id":"https://doi.org/10.53731/a0d9m3n-n7r8h0m","short_id":"3ng2zrg1","url":"https://blog.front-matter.io/posts/guidelines-for-scholarly-blogs/","title":"Guidelines
|
873
|
+
for Scholarly Blogs","summary":"These guidelines are recommendations for authors
|
874
|
+
of scholarly blogs to help with long-term archiving, discoverability, and
|
875
|
+
citation of blog content.They are modeled after the publication A Data Citation...","date_published":"2023-02-06T11:52:24Z","date_modified":"2023-02-06T11:52:24Z","authors":[{"url":"https://orcid.org/0000-0003-1419-2405","name":"Martin
|
876
|
+
Fenner"}],"image":"https://images.unsplash.com/photo-1584631277142-0ca0cfc76aec?crop=entropy&cs=tinysrgb&fit=max&fm=jpg&ixid=MnwxMTc3M3wwfDF8c2VhcmNofDZ8fGd1aWRlbGluZXxlbnwwfHx8fDE2NzU2ODM0NDc&ixlib=rb-4.0.3&q=80&w=2000","content_html":"
|
877
|
+
<p><img src=\"https://images.unsplash.com/photo-1584631277142-0ca0cfc76aec?crop=entropy&cs=tinysrgb&fit=max&fm=jpg&ixid=MnwxMTc3M3wwfDF8c2VhcmNofDZ8fGd1aWRlbGluZXxlbnwwfHx8fDE2NzU2ODM0NDc&ixlib=rb-4.0.3&q=80&w=2000\"></p><p>These
|
878
|
+
guidelines are recommendations for authors of scholarly blogs to help with
|
879
|
+
long-term archiving, discoverability, and citation of blog content.<br>They
|
880
|
+
are modeled after the publication <a href=\"https://doi.org/10.1038/s41597-019-0031-8\">A
|
881
|
+
Data Citation Roadmap for Scholarly Data Repositories</a>, where many of the
|
882
|
+
same guidelines apply, and where I was the first author and <a href=\"https://force11.org/group/data-citation-implementation-group/\">co-chair
|
883
|
+
of the corresponding Force11 working group.</a></p><p>These guidelines focus
|
884
|
+
on the required or recommended work for scholarly blog authors. For scholarly
|
885
|
+
blog archives such as the <a href=\"https://rogue-scholar.org\">Rogue Scholar</a>,
|
886
|
+
additional guidelines are in development.</p><!--kg-card-begin: html--><table>\n<thead>\n<tr>\n<th>Level</th>\n<th
|
887
|
+
style=\"text-align: right\">#</th>\n<th>Guideline</th>\n</tr>\n</thead>\n<tbody>\n<tr>\n<td>Required</td>\n<td
|
888
|
+
style=\"text-align: right\">1</td>\n<td>The full-text content <em>must</em>
|
889
|
+
be made available via public RSS feed (in RSS, Atom or JSON Feed format).</td>\n</tr>\n<tr>\n<td>Required</td>\n<td
|
890
|
+
style=\"text-align: right\">2</td>\n<td>Each blog post in the RSS feed <em>must</em>
|
891
|
+
have a title, author(s), and publication date.</td>\n</tr>\n<tr>\n<td>Required</td>\n<td
|
892
|
+
style=\"text-align: right\">3</td>\n<td>Each blog post <em>must</em> have
|
893
|
+
a URL that resolves to a public landing page specific for that blog post.</td>\n</tr>\n<tr>\n<td>Required</td>\n<td
|
894
|
+
style=\"text-align: right\">4</td>\n<td>The full-text content <em>must</em>
|
895
|
+
be made available via a Creative Commons Attribution (CC-BY) license.</td>\n</tr>\n<tr>\n<td>Required</td>\n<td
|
896
|
+
style=\"text-align: right\">5</td>\n<td>The blog must provide documentation
|
897
|
+
about long-term archiving, discoverability, and citation.</td>\n</tr>\n<tr>\n<td>Recommended</td>\n<td
|
898
|
+
style=\"text-align: right\">6</td>\n<td>Each blog post in the RSS feed <em>should</em>
|
899
|
+
have a persistent identifier, description, language, and last updated date.</td>\n</tr>\n<tr>\n<td>Recommended</td>\n<td
|
900
|
+
style=\"text-align: right\">7</td>\n<td>The landing page <em>should</em> include
|
901
|
+
metadata required for citation, and ideally also metadata facilitating discovery,
|
902
|
+
in human-readable and machine-readable format.</td>\n</tr>\n<tr>\n<td>Recommended</td>\n<td
|
903
|
+
style=\"text-align: right\">8</td>\n<td>The machine-readable metadata <em>should</em>
|
904
|
+
use schema.org markup in JSON-LD format.</td>\n</tr>\n<tr>\n<td>Recommended</td>\n<td
|
905
|
+
style=\"text-align: right\">9</td>\n<td>Metadata <em>should</em> be made available
|
906
|
+
via HTML meta tags to facilitate use by reference managers.</td>\n</tr>\n<tr>\n<td>Recommended</td>\n<td
|
907
|
+
style=\"text-align: right\">10</td>\n<td>Metadata <em>should</em> be made
|
908
|
+
available for download in BibTeX and/or another standard bibliographic format.</td>\n</tr>\n</tbody>\n</table><!--kg-card-end:
|
909
|
+
html--><p>The requirement for full-text content via RSS feed and with a CC-BY
|
910
|
+
license comes from the need to make archiving and indexing as simple (and
|
911
|
+
cheap) as possible. Dealing with multiple licenses, private feeds, and private
|
912
|
+
content adds an extra level of complexity and is not supportive of Open Science.</p><p>Metadata
|
913
|
+
via HTML meta tags and JSON-LD (using schema.org markup) are two main strategies
|
914
|
+
to embed metadata in web pages, to support reference managers but also indexers.
|
915
|
+
Schema.org is simpler to work with, e.g. for more complex author information
|
916
|
+
such as separate given and family names, author identifiers such as ORCID,
|
917
|
+
and affiliation information. On the other hand, reference managers and Google
|
918
|
+
Scholar currently use HTML meta tags, and it is sometimes easier to add this
|
919
|
+
information to a blog.</p><p>Registration of DOIs as other persistent identifiers
|
920
|
+
for blog posts is something that I want to provide via the Rogue Scholar archive,
|
921
|
+
as the effort required is not trivial. The information required (mainly title,
|
922
|
+
author(s), publication date, and URL) is readily available via the RSS feed.
|
923
|
+
Of course, displaying these DOIs on the blog is recommended, and for the DOIs
|
924
|
+
to resolve to the blog itself rather than the blog archive at the Rogue Scholar
|
925
|
+
or elsewhere.</p><p>The recommended or optional metadata for science blog
|
926
|
+
posts is of course a big topic that needs more discussion. Description, language,
|
927
|
+
and last updated date seem desired and readily available. References used
|
928
|
+
in blog posts would be fantastic to be included in the metadata, but there
|
929
|
+
is currently no easy and standard way of doing this. For better discoverability,
|
930
|
+
it would make sense to provide geo coordinates and/or temporal information,
|
931
|
+
and all blogs would benefit from using subject classification such as the
|
932
|
+
<a href=\"https://www.oecd.org/science/inno/38235147.pdf\">OECD Fields of
|
933
|
+
Science and Technology</a>, but all this would require significantly more
|
934
|
+
effort.</p><p>These guidelines are a work in progress and are made available
|
935
|
+
as part of the <a href=\"https://docs.rogue-scholar.org/guidelines\">Rogue
|
936
|
+
Scholar Documentation</a>. Feedback is greatly appreciated.</p> ","tags":["Feature"],"language":"en"},{"id":"https://doi.org/10.53731/4nwxn-frt36","short_id":"1jgo8yel","url":"https://blog.front-matter.io/posts/does-it-compose/","title":"Does
|
937
|
+
it compose?","summary":"One question I have increasingly asked myself in the
|
938
|
+
past few years. Meaning Can I run this open source software using Docker containers
|
939
|
+
and a Docker Compose file?As the Docker project turned ten this...","date_published":"2023-05-16T11:36:56Z","date_modified":"2023-05-16T11:36:56Z","authors":[{"url":"https://orcid.org/0000-0003-1419-2405","name":"Martin
|
940
|
+
Fenner"}],"image":"https://images.unsplash.com/photo-1523351964962-1ee5847816c3?crop=entropy&cs=tinysrgb&fit=max&fm=jpg&ixid=M3wxMTc3M3wwfDF8c2VhcmNofDUzfHxjb250YWluZXJ8ZW58MHx8fHwxNjg0MjMyMTQ0fDA&ixlib=rb-4.0.3&q=80&w=2000","content_html":"
|
941
|
+
<p><img src=\"https://images.unsplash.com/photo-1523351964962-1ee5847816c3?crop=entropy&cs=tinysrgb&fit=max&fm=jpg&ixid=M3wxMTc3M3wwfDF8c2VhcmNofDUzfHxjb250YWluZXJ8ZW58MHx8fHwxNjg0MjMyMTQ0fDA&ixlib=rb-4.0.3&q=80&w=2000\"></p><p>One
|
942
|
+
question I have increasingly asked myself in the past few years. Meaning </p><blockquote>Can
|
943
|
+
I run this open source software using Docker containers and a Docker Compose
|
944
|
+
file?</blockquote><p>As the Docker project <a href=\"https://snyk.io/blog/the-docker-project-turns-10/\">turned
|
945
|
+
ten this spring</a>, it has become standard practice to distribute open source
|
946
|
+
software via Docker images and to provide a <a href=\"https://docs.docker.com/compose/\">Docker
|
947
|
+
Compose</a> file to run the software together with other dependencies. The
|
948
|
+
<a href=\"https://github.com/docker/awesome-compose\">Awesome Compose</a>
|
949
|
+
project has collected many examples, and all you need is a <code>docker-compose.yml</code>file
|
950
|
+
and a recent installation of Docker, e.g. <a href=\"https://www.docker.com/products/docker-desktop/\">Docker
|
951
|
+
Desktop</a>. Be aware that Docker Compose has evolved over the years. It started
|
952
|
+
out as a dedicated Python application but was later integrated into the Docker
|
953
|
+
application (written in Go) as Compose V2.</p><p>Docker and Docker Compose
|
954
|
+
allow you to run pretty complex applications without first addressing a long
|
955
|
+
list of requirements (which might conflict with other software you have installed),
|
956
|
+
or needing a long and complex build step where many things can go wrong. For
|
957
|
+
example a self-hosted instance of Supabase (a hosted Postgres database with
|
958
|
+
additional features) that I installed last week following <a href=\"https://supabase.com/docs/guides/self-hosting/docker\">these
|
959
|
+
instructions</a>.</p><p>An important open source project that I am involved
|
960
|
+
in is <a href=\"https://inveniordm.docs.cern.ch/\">InvenioRDM</a>, the turn-key
|
961
|
+
research data management repository. InvenioRDM started in 2019, with a first
|
962
|
+
production-suitable version in August 2021, and the <a href=\"https://inveniosoftware.org/products/rdm/#status\">next
|
963
|
+
major goal </a>is to have the large and popular <a href=\"https://zenodo.org/\">Zenodo</a>
|
964
|
+
repository running on top of InvenioRDM. Zenodo <a href=\"https://blog.zenodo.org/2023/05/08/2023-05-08-10years/\">turned
|
965
|
+
ten last week</a>, a few weeks after Docker. Interestingly, my personal tenth
|
966
|
+
anniversary was last year in May as I became a full-time software developer
|
967
|
+
and left academic medicine as a medical doctor treating cancer patients in
|
968
|
+
<a href=\"https://doi.org/10.53731/r294649-6f79289-8cw2j\">May 2012</a>.</p><p>Unfortunately,
|
969
|
+
InvenioRDM \"doesn''t compose\" yet. It is very close, but there are no ready-made
|
970
|
+
Docker images to download, and the <a href=\"https://inveniordm.docs.cern.ch/install/\">installation
|
971
|
+
instructions</a> start with installing a Python command-line tool (invenio-cli).
|
972
|
+
So if you have 1-2 hours to play with InvenioRDM and get a first impression,
|
973
|
+
there is no official solution from the InvenioRDM project yet. For this reason,
|
974
|
+
I started the <a href=\"https://github.com/front-matter/docker-invenio-rdm\">docker-invenio-rdm</a>
|
975
|
+
repository on Github. It contains a Docker Compose file that uses pre-built
|
976
|
+
Docker images, and using that file with a <code>docker compose up</code>command
|
977
|
+
on your local computer should give you a running InvenioRDM within 15 minutes:</p><figure
|
978
|
+
class=\"kg-card kg-image-card\"><img src=\"https://blog.front-matter.io/content/images/2023/05/Bildschirmfoto-2023-05-11-um-10.37.55.png\"
|
979
|
+
class=\"kg-image\" alt loading=\"lazy\" width=\"2000\" height=\"1210\" srcset=\"https://blog.front-matter.io/content/images/size/w600/2023/05/Bildschirmfoto-2023-05-11-um-10.37.55.png
|
980
|
+
600w, https://blog.front-matter.io/content/images/size/w1000/2023/05/Bildschirmfoto-2023-05-11-um-10.37.55.png
|
981
|
+
1000w, https://blog.front-matter.io/content/images/size/w1600/2023/05/Bildschirmfoto-2023-05-11-um-10.37.55.png
|
982
|
+
1600w, https://blog.front-matter.io/content/images/2023/05/Bildschirmfoto-2023-05-11-um-10.37.55.png
|
983
|
+
2193w\" sizes=\"(min-width: 720px) 720px\"></figure><p>I started this recently
|
984
|
+
and obviously want to move forward in two directions:</p><ul><li>fine-tune
|
985
|
+
the initial configuration to provide a great initial experience with InvenioRDM,
|
986
|
+
e.g. making it easy to <a href=\"https://inveniordm.docs.cern.ch/develop/topics/theming/\">theme</a>
|
987
|
+
the InvenioRDM instance</li><li>make this an official part of the InvenioRDM
|
988
|
+
project, extending the <a href=\"https://github.com/inveniosoftware/docker-invenio\">docker-invenio</a>
|
989
|
+
GitHub repository that provides Docker base images for InvenioRDM and other
|
990
|
+
projects using the Invenio software.</li></ul><p>But of course, Docker Compose
|
991
|
+
is not the answer to all questions regarding running Docker-based infrastructure.
|
992
|
+
For production environments, most people shy away from using Docker Compose.
|
993
|
+
The reasons for that and the alternatives will be the topic of a future blog
|
994
|
+
post (spoiler: there is exciting news).</p><p>Docker Compose also needs more
|
995
|
+
work to be set up correctly for development environments. It is a common practice
|
996
|
+
and a workflow I used while working at DataCite (where we launched Docker-based
|
997
|
+
infrastructure in 2016), but for now, the easiest way to set up InvenioRDM
|
998
|
+
development environments is using the <a href=\"https://inveniordm.docs.cern.ch/install/\">invenio-cli
|
999
|
+
tool with a local development environment</a>.</p><p>Please reach out to me
|
1000
|
+
with feedback on running Docker Compose for InvenioRDM (use the <a href=\"https://github.com/front-matter/docker-invenio-rdm/discussions\">discussions</a>
|
1001
|
+
feature in the GitHub repo), or if you have questions about running InvenioRDM
|
1002
|
+
in production.</p> ","tags":["News"],"language":"en"},{"id":"https://doi.org/10.53731/fawv321-14359c4","short_id":"56gl1qd9","url":"https://blog.front-matter.io/posts/announcing-commonmeta-ruby/","title":"Announcing
|
1003
|
+
commonmeta-ruby","summary":"Following recent announcements of the commonmeta
|
1004
|
+
standard for scholarly metadata and a Python package that converts several
|
1005
|
+
metadata formats (commonmeta-py), today I am happy to announce commonmeta-ruby,
|
1006
|
+
a...","date_published":"2023-03-20T14:54:00Z","date_modified":"2023-03-22T12:32:52Z","authors":[{"url":"https://orcid.org/0000-0003-1419-2405","name":"Martin
|
1007
|
+
Fenner"}],"image":"https://images.unsplash.com/photo-1676284572206-2501ff5c6956?crop=entropy&cs=tinysrgb&fit=max&fm=jpg&ixid=MnwxMTc3M3wwfDF8c2VhcmNofDUwfHxiaWtlJTIwbSVDMyVCQ25zdGVyfGVufDB8fHx8MTY3OTMyMTU4MA&ixlib=rb-4.0.3&q=80&w=2000","content_html":"
|
1008
|
+
<p><img src=\"https://images.unsplash.com/photo-1676284572206-2501ff5c6956?crop=entropy&cs=tinysrgb&fit=max&fm=jpg&ixid=MnwxMTc3M3wwfDF8c2VhcmNofDUwfHxiaWtlJTIwbSVDMyVCQ25zdGVyfGVufDB8fHx8MTY3OTMyMTU4MA&ixlib=rb-4.0.3&q=80&w=2000\"></p><p>Following
|
1009
|
+
recent announcements of the <a href=\"https://commonmeta.org\">commonmeta</a>
|
1010
|
+
standard for scholarly metadata and a Python package that converts several
|
1011
|
+
metadata formats (<a href=\"https://github.com/front-matter/commonmeta-py\">commonmeta-py</a>),
|
1012
|
+
today I am happy to announce <a href=\"https://github.com/front-matter/commonmeta-ruby\">commonmeta-ruby</a>,
|
1013
|
+
a Ruby gem and command-line tool to convert scholarly metadata using commonmeta
|
1014
|
+
as the internal format. commonmeta-ruby is based on the <a href=\"https://github.com/datacite/bolognese\">bolognese
|
1015
|
+
Ruby library</a> that I started a few ago while working at DataCite, but is
|
1016
|
+
a major rewrite that uses commonmeta as its intermediary conversion format.</p><p>Originally
|
1017
|
+
planned for later this year, I decided to speed up the release as Ruby version
|
1018
|
+
2.x (currently 2.7.7) reaches its <a href=\"https://endoflife.date/ruby\">end
|
1019
|
+
of life</a> this month, and <a href=\"https://rubygems.org/gems/briard\">briard</a>
|
1020
|
+
(the fork I wrote to support additional metadata conversions such as <a href=\"https://citation-file-format.github.io/\">Citation
|
1021
|
+
File Format</a> and Crossref DOI registrations) didn''t fully work with Ruby
|
1022
|
+
3.x. In addition to supporting Ruby 3.x and validating with the <a href=\"https://commonmeta.org/schema\">commonmeta
|
1023
|
+
JSON Schema</a>, commonmeta-ruby dropped support for DataCite XML. The DataCite
|
1024
|
+
REST API has always been a JSON API, and DOI registration using DataCite XML
|
1025
|
+
for many years has used JSON under the hood. Metadata conversion using XML
|
1026
|
+
is painful, and focussing on JSON metadata simplifies further development.</p><p>The
|
1027
|
+
next steps for commonmeta are:</p><ul><li>Refine the commonmeta-py and commonmeta-ruby
|
1028
|
+
libraries by adding tests and real-world implementations (such as the DOI
|
1029
|
+
registration for this blog post, which was done using commonmeta-ruby)</li><li>Work
|
1030
|
+
towards a commonmeta v1.0 JSON Schema</li><li>Add support for bibliographies
|
1031
|
+
(lists of resources) to commonmeta.</li><li>Commonmeta implementations in
|
1032
|
+
additional languages, in particular Javascript/Typescript.</li></ul> ","tags":["News"],"language":"en"}]}'
|
1033
|
+
recorded_at: Sun, 04 Jun 2023 13:34:34 GMT
|
1034
|
+
recorded_with: VCR 6.1.0
|