sawzall 0.1.0.pre → 0.1.0.pre2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Cargo.lock +2 -1
- data/ext/sawzall/Cargo.toml +2 -0
- data/ext/sawzall/src/html_to_plain.rs +6 -3
- data/lib/sawzall/extension.rb +15 -0
- data/lib/sawzall/version.rb +1 -1
- data/lib/sawzall.rb +1 -1
- data/sawzall.gemspec +2 -1
- metadata +7 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 26ff46ad7cf1baba47468b3a6c5dd9547c8cae75c5032fe1864d3cdea4f59f47
|
4
|
+
data.tar.gz: 045c2459878b3311f2580bad454fbce664a1b96b2c6f2a9f5213ae303a7029c6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 743ef38e41abdd1cc723b50dfe98b8ba3eed3a5943ba4c060872282b85e4ea38db1a56d27b3f0b9d5b190be7ab681e6d6d10958a26b0189ed466690a1f08aff2
|
7
|
+
data.tar.gz: a69ec24f93d97ddf16cfa54bfc14dcb3fee54e056adcfe42cbf1802c44529ce35c8fc3442dcda748a92ca94bfa7d36cfce8f77d92c71c05ef262a74275d8543c
|
data/Cargo.lock
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
# This file is automatically @generated by Cargo.
|
2
2
|
# It is not intended for manual editing.
|
3
|
-
version =
|
3
|
+
version = 3
|
4
4
|
|
5
5
|
[[package]]
|
6
6
|
name = "aho-corasick"
|
@@ -516,6 +516,7 @@ name = "sawzall"
|
|
516
516
|
version = "0.1.0"
|
517
517
|
dependencies = [
|
518
518
|
"ego-tree",
|
519
|
+
"lazy_static",
|
519
520
|
"magnus",
|
520
521
|
"scraper",
|
521
522
|
]
|
data/ext/sawzall/Cargo.toml
CHANGED
@@ -5,11 +5,13 @@ edition = "2021"
|
|
5
5
|
authors = ["David Cornu <me@davidcornu.com>"]
|
6
6
|
license = "MIT"
|
7
7
|
publish = false
|
8
|
+
rust-version = "1.75.0"
|
8
9
|
|
9
10
|
[lib]
|
10
11
|
crate-type = ["cdylib"]
|
11
12
|
|
12
13
|
[dependencies]
|
13
14
|
ego-tree = "0.10.0"
|
15
|
+
lazy_static = "1.5.0"
|
14
16
|
magnus = { version = "0.7.1" }
|
15
17
|
scraper = { version = "0.23.1", features = ["atomic"] }
|
@@ -1,6 +1,7 @@
|
|
1
1
|
use ego_tree::iter::Edge;
|
2
|
+
use lazy_static::lazy_static;
|
2
3
|
use scraper::{ElementRef, Node};
|
3
|
-
use std::
|
4
|
+
use std::collections::HashSet;
|
4
5
|
|
5
6
|
/// Set of block-level elements extracted from [MDN][1]
|
6
7
|
///
|
@@ -41,8 +42,10 @@ const BLOCK_LEVEL_ELEMENTS: [&'static str; 33] = [
|
|
41
42
|
"ul",
|
42
43
|
];
|
43
44
|
|
44
|
-
|
45
|
-
|
45
|
+
lazy_static! {
|
46
|
+
static ref BLOCK_LEVEL_ELEMENTS_SET: HashSet<&'static str> =
|
47
|
+
BLOCK_LEVEL_ELEMENTS.iter().map(|el| *el).collect();
|
48
|
+
}
|
46
49
|
|
47
50
|
fn is_block_element(name: &str) -> bool {
|
48
51
|
BLOCK_LEVEL_ELEMENTS_SET.contains(&name)
|
@@ -0,0 +1,15 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# Adapted from https://github.com/gjtorikian/commonmarker/blob/c12e5cbce128fe1863b7290a2403b7b247d79d2e/lib/commonmarker/extension.rb
|
4
|
+
begin
|
5
|
+
# native precompiled gems package shared libraries in <gem_dir>/lib/sawzall/<ruby_version>
|
6
|
+
# load the precompiled extension file
|
7
|
+
ruby_version = /\d+\.\d+/.match(RUBY_VERSION)
|
8
|
+
require_relative "#{ruby_version}/sawzall"
|
9
|
+
rescue LoadError
|
10
|
+
# fall back to the extension compiled upon installation.
|
11
|
+
# use "require" instead of "require_relative" because non-native gems will place C extension files
|
12
|
+
# in Gem::BasicSpecification#extension_dir after compilation (during normal installation), which
|
13
|
+
# is in $LOAD_PATH but not necessarily relative to this file (see nokogiri#2300)
|
14
|
+
require "sawzall/sawzall"
|
15
|
+
end
|
data/lib/sawzall/version.rb
CHANGED
data/lib/sawzall.rb
CHANGED
data/sawzall.gemspec
CHANGED
@@ -20,6 +20,7 @@ Gem::Specification.new do |spec|
|
|
20
20
|
|
21
21
|
spec.metadata["homepage_uri"] = spec.homepage
|
22
22
|
spec.metadata["source_code_uri"] = spec.homepage
|
23
|
+
spec.metadata["documentation_uri"] = "https://davidcornu.github.io/sawzall/"
|
23
24
|
spec.metadata["rubygems_mfa_required"] = "true"
|
24
25
|
|
25
26
|
# Specify which files should be added to the gem when it is released.
|
@@ -41,5 +42,5 @@ Gem::Specification.new do |spec|
|
|
41
42
|
spec.require_paths = ["lib"]
|
42
43
|
spec.extensions = ["ext/sawzall/extconf.rb"]
|
43
44
|
|
44
|
-
spec.add_dependency "rb_sys", "~> 0.9.
|
45
|
+
spec.add_dependency "rb_sys", "~> 0.9.111"
|
45
46
|
end
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sawzall
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.0.
|
4
|
+
version: 0.1.0.pre2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- David Cornu
|
8
8
|
bindir: exe
|
9
9
|
cert_chain: []
|
10
|
-
date:
|
10
|
+
date: 2025-05-12 00:00:00.000000000 Z
|
11
11
|
dependencies:
|
12
12
|
- !ruby/object:Gem::Dependency
|
13
13
|
name: rb_sys
|
@@ -15,14 +15,14 @@ dependencies:
|
|
15
15
|
requirements:
|
16
16
|
- - "~>"
|
17
17
|
- !ruby/object:Gem::Version
|
18
|
-
version: 0.9.
|
18
|
+
version: 0.9.111
|
19
19
|
type: :runtime
|
20
20
|
prerelease: false
|
21
21
|
version_requirements: !ruby/object:Gem::Requirement
|
22
22
|
requirements:
|
23
23
|
- - "~>"
|
24
24
|
- !ruby/object:Gem::Version
|
25
|
-
version: 0.9.
|
25
|
+
version: 0.9.111
|
26
26
|
description: |
|
27
27
|
Sawzall wraps the Rust scraper library (https://github.com/rust-scraper/scraper)
|
28
28
|
to make it easy to parse HTML documents and query them with CSS selectors.
|
@@ -42,6 +42,7 @@ files:
|
|
42
42
|
- ext/sawzall/src/html_to_plain.rs
|
43
43
|
- ext/sawzall/src/lib.rs
|
44
44
|
- lib/sawzall.rb
|
45
|
+
- lib/sawzall/extension.rb
|
45
46
|
- lib/sawzall/version.rb
|
46
47
|
- sawzall.gemspec
|
47
48
|
homepage: https://github.com/davidcornu/sawzall
|
@@ -50,6 +51,7 @@ licenses:
|
|
50
51
|
metadata:
|
51
52
|
homepage_uri: https://github.com/davidcornu/sawzall
|
52
53
|
source_code_uri: https://github.com/davidcornu/sawzall
|
54
|
+
documentation_uri: https://davidcornu.github.io/sawzall/
|
53
55
|
rubygems_mfa_required: 'true'
|
54
56
|
rdoc_options: []
|
55
57
|
require_paths:
|
@@ -65,7 +67,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
65
67
|
- !ruby/object:Gem::Version
|
66
68
|
version: 3.3.11
|
67
69
|
requirements: []
|
68
|
-
rubygems_version: 3.6.
|
70
|
+
rubygems_version: 3.6.2
|
69
71
|
specification_version: 4
|
70
72
|
summary: HTML parsing and querying with CSS selectors.
|
71
73
|
test_files: []
|